You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/06/09 03:37:49 UTC
svn commit: r952877 [1/2] - in /hadoop/hive/trunk: ./
common/src/java/org/apache/hadoop/hive/conf/ conf/ metastore/if/
metastore/src/gen-cpp/
metastore/src/gen-javabean/org/apache/hadoop/hive/metastore/api/
metastore/src/gen-php/ metastore/src/gen-py/h...
Author: namit
Date: Wed Jun 9 01:37:48 2010
New Revision: 952877
URL: http://svn.apache.org/viewvc?rev=952877&view=rev
Log:
HIVE-1332. Support for archiving partitions
(Paul Yang via namit)
Added:
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableSimpleDesc.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveDesc.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java
hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive1.q
hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive2.q
hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive3.q
hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive4.q
hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive5.q
hadoop/hive/trunk/ql/src/test/queries/clientpositive/archive.q
hadoop/hive/trunk/ql/src/test/results/clientnegative/archive1.q.out
hadoop/hive/trunk/ql/src/test/results/clientnegative/archive2.q.out
hadoop/hive/trunk/ql/src/test/results/clientnegative/archive3.q.out
hadoop/hive/trunk/ql/src/test/results/clientnegative/archive4.q.out
hadoop/hive/trunk/ql/src/test/results/clientnegative/archive5.q.out
hadoop/hive/trunk/ql/src/test/results/clientpositive/archive.q.out
hadoop/hive/trunk/shims/src/0.20/java/org/apache/hadoop/hive/shims/HiveHarFileSystem.java
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/build-common.xml
hadoop/hive/trunk/build.properties
hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
hadoop/hive/trunk/conf/hive-default.xml
hadoop/hive/trunk/metastore/if/hive_metastore.thrift
hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.cpp
hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.h
hadoop/hive/trunk/metastore/src/gen-javabean/org/apache/hadoop/hive/metastore/api/Constants.java
hadoop/hive/trunk/metastore/src/gen-php/hive_metastore_constants.php
hadoop/hive/trunk/metastore/src/gen-py/hive_metastore/constants.py
hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TouchDesc.java
hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java
hadoop/hive/trunk/ql/src/test/templates/TestCliDriver.vm
hadoop/hive/trunk/ql/src/test/templates/TestNegativeCliDriver.vm
hadoop/hive/trunk/shims/build.xml
hadoop/hive/trunk/shims/src/0.17/java/org/apache/hadoop/hive/shims/Hadoop17Shims.java
hadoop/hive/trunk/shims/src/0.18/java/org/apache/hadoop/hive/shims/Hadoop18Shims.java
hadoop/hive/trunk/shims/src/0.19/java/org/apache/hadoop/hive/shims/Hadoop19Shims.java
hadoop/hive/trunk/shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java
hadoop/hive/trunk/shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java
hadoop/hive/trunk/shims/src/common/java/org/apache/hadoop/hive/shims/ShimLoader.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Wed Jun 9 01:37:48 2010
@@ -84,6 +84,9 @@ Trunk - Unreleased
HIVE-1351. rcfilecat for debugging
(Yongqiang He via namit)
+ HIVE-1332. Support for archiving partitions
+ (Paul Yang via namit)
+
IMPROVEMENTS
HIVE-983. Function from_unixtime takes long.
(Ning Zhang via zshao)
Modified: hadoop/hive/trunk/build-common.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/build-common.xml?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/build-common.xml (original)
+++ hadoop/hive/trunk/build-common.xml Wed Jun 9 01:37:48 2010
@@ -203,6 +203,7 @@
<!-- the normal classpath -->
<path id="common-classpath">
<pathelement location="${hadoop.jar}"/>
+ <pathelement location="${hadoop.tools.jar}"/>
<pathelement location="${build.dir.hive}/classes"/>
<fileset dir="${build.dir.hive}" includes="*/*.jar"/>
<fileset dir="${hive.root}/lib" includes="*.jar"/>
Modified: hadoop/hive/trunk/build.properties
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/build.properties?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/build.properties (original)
+++ hadoop/hive/trunk/build.properties Wed Jun 9 01:37:48 2010
@@ -21,6 +21,7 @@ hadoop.version.ant-internal=${hadoop.ver
hadoop.root.default=${build.dir.hadoop}/hadoop-${hadoop.version.ant-internal}
hadoop.root=${hadoop.root.default}
hadoop.jar=${hadoop.root}/hadoop-${hadoop.version.ant-internal}-core.jar
+hadoop.tools.jar=${hadoop.root}/hadoop-${hadoop.version.ant-internal}-tools.jar
hadoop.test.jar=${hadoop.root}/hadoop-${hadoop.version.ant-internal}-test.jar
jetty.test.jar=${hadoop.root}/lib/jetty-5.1.4.jar
servlet.test.jar=${hadoop.root}/lib/servlet-api.jar
Modified: hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Wed Jun 9 01:37:48 2010
@@ -124,9 +124,17 @@ public class HiveConf extends Configurat
// datastore. Once reloaded, the this value is reset to false. Used for
// testing only.
METASTOREFORCERELOADCONF("hive.metastore.force.reload.conf", false),
-
METASTORESERVERMINTHREADS("hive.metastore.server.min.threads", 200),
METASTORESERVERMAXTHREADS("hive.metastore.server.max.threads", Integer.MAX_VALUE),
+ // Intermediate dir suffixes used for archiving. Not important what they
+ // are, as long as collisions are avoided
+ METASTORE_INT_ORIGINAL("hive.metastore.archive.intermediate.original",
+ "_INTERMEDIATE_ORIGINAL"),
+ METASTORE_INT_ARCHIVED("hive.metastore.archive.intermediate.archived",
+ "_INTERMEDIATE_ARCHIVED"),
+ METASTORE_INT_EXTRACTED("hive.metastore.archive.intermediate.extracted",
+ "_INTERMEDIATE_EXTRACTED"),
+
// CLI
CLIIGNOREERRORS("hive.cli.errors.ignore", false),
@@ -238,6 +246,10 @@ public class HiveConf extends Configurat
HIVEOPTBUCKETMAPJOIN("hive.optimize.bucketmapjoin", false), // optimize bucket map join
HIVEOPTSORTMERGEBUCKETMAPJOIN("hive.optimize.bucketmapjoin.sortedmerge", false), // try to use sorted merge bucket map join
HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true),
+
+ // For har files
+ HIVEARCHIVEENABLED("hive.archive.enabled", false),
+ HIVEHARPARENTDIRSETTABLE("hive.archive.har.parentdir.settable", false),
;
public final String varname;
Modified: hadoop/hive/trunk/conf/hive-default.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/conf/hive-default.xml?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/conf/hive-default.xml (original)
+++ hadoop/hive/trunk/conf/hive-default.xml Wed Jun 9 01:37:48 2010
@@ -563,4 +563,24 @@
<description>The default partition name in case the dynamic partition column value is null/empty string or anyother values that cannot be escaped. This value must not contain any special character used in HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the dynamic partition value should not contain this value to avoid confusions.</description>
</property>
+<property>
+ <name>fs.har.impl</name>
+ <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
+ <description>The implementation for accessing Hadoop Archives. Note that this won't be applicable to Hadoop vers less than 0.20</description>
+</property>
+
+<property>
+ <name>hive.archive.enabled</name>
+ <value>false</value>
+ <description>Whether archiving operations are permitted</description>
+</property>
+
+<property>
+ <name>hive.archive.har.parentdir.settable</name>
+ <value>false</value>
+ <description>In new Hadoop versions, the parent directory must be set while
+ creating a HAR. Because this functionality is hard to detect with just version
+ numbers, this conf var needs to be set manually.</description>
+</property>
+
</configuration>
Modified: hadoop/hive/trunk/metastore/if/hive_metastore.thrift
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/if/hive_metastore.thrift?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/if/hive_metastore.thrift (original)
+++ hadoop/hive/trunk/metastore/if/hive_metastore.thrift Wed Jun 9 01:37:48 2010
@@ -246,6 +246,15 @@ service ThriftHiveMetastore extends fb30
throws(1: MetaException o1)
}
+// For storing info about archived partitions in parameters
+
+// Whether the partition is archived
+const string IS_ARCHIVED = "is_archived",
+// The original location of the partition, before archiving. After archiving,
+// this directory will contain the archive. When the partition
+// is dropped, this directory will be deleted
+const string ORIGINAL_LOCATION = "original_location",
+
// these should be needed only for backward compatibility with filestore
const string META_TABLE_COLUMNS = "columns",
const string META_TABLE_COLUMN_TYPES = "columns.types",
@@ -261,3 +270,5 @@ const string FILE_INPUT_FORMAT = "fil
const string FILE_OUTPUT_FORMAT = "file.outputformat",
const string META_TABLE_STORAGE = "storage_handler",
+
+
Modified: hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.cpp
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.cpp?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.cpp (original)
+++ hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.cpp Wed Jun 9 01:37:48 2010
@@ -12,6 +12,10 @@ const hive_metastoreConstants g_hive_met
hive_metastoreConstants::hive_metastoreConstants() {
DDL_TIME = "transient_lastDdlTime";
+ IS_ARCHIVED = "is_archived";
+
+ ORIGINAL_LOCATION = "original_location";
+
META_TABLE_COLUMNS = "columns";
META_TABLE_COLUMN_TYPES = "columns.types";
Modified: hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.h
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.h?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.h (original)
+++ hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.h Wed Jun 9 01:37:48 2010
@@ -15,6 +15,8 @@ class hive_metastoreConstants {
hive_metastoreConstants();
std::string DDL_TIME;
+ std::string IS_ARCHIVED;
+ std::string ORIGINAL_LOCATION;
std::string META_TABLE_COLUMNS;
std::string META_TABLE_COLUMN_TYPES;
std::string BUCKET_FIELD_NAME;
Modified: hadoop/hive/trunk/metastore/src/gen-javabean/org/apache/hadoop/hive/metastore/api/Constants.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/gen-javabean/org/apache/hadoop/hive/metastore/api/Constants.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/gen-javabean/org/apache/hadoop/hive/metastore/api/Constants.java (original)
+++ hadoop/hive/trunk/metastore/src/gen-javabean/org/apache/hadoop/hive/metastore/api/Constants.java Wed Jun 9 01:37:48 2010
@@ -18,6 +18,10 @@ public class Constants {
public static final String DDL_TIME = "transient_lastDdlTime";
+ public static final String IS_ARCHIVED = "is_archived";
+
+ public static final String ORIGINAL_LOCATION = "original_location";
+
public static final String META_TABLE_COLUMNS = "columns";
public static final String META_TABLE_COLUMN_TYPES = "columns.types";
Modified: hadoop/hive/trunk/metastore/src/gen-php/hive_metastore_constants.php
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/gen-php/hive_metastore_constants.php?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/gen-php/hive_metastore_constants.php (original)
+++ hadoop/hive/trunk/metastore/src/gen-php/hive_metastore_constants.php Wed Jun 9 01:37:48 2010
@@ -10,6 +10,10 @@ $GLOBALS['hive_metastore_CONSTANTS'] = a
$GLOBALS['hive_metastore_CONSTANTS']['DDL_TIME'] = "transient_lastDdlTime";
+$GLOBALS['hive_metastore_CONSTANTS']['IS_ARCHIVED'] = "is_archived";
+
+$GLOBALS['hive_metastore_CONSTANTS']['ORIGINAL_LOCATION'] = "original_location";
+
$GLOBALS['hive_metastore_CONSTANTS']['META_TABLE_COLUMNS'] = "columns";
$GLOBALS['hive_metastore_CONSTANTS']['META_TABLE_COLUMN_TYPES'] = "columns.types";
Modified: hadoop/hive/trunk/metastore/src/gen-py/hive_metastore/constants.py
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/gen-py/hive_metastore/constants.py?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/gen-py/hive_metastore/constants.py (original)
+++ hadoop/hive/trunk/metastore/src/gen-py/hive_metastore/constants.py Wed Jun 9 01:37:48 2010
@@ -9,6 +9,10 @@ from ttypes import *
DDL_TIME = "transient_lastDdlTime"
+IS_ARCHIVED = "is_archived"
+
+ORIGINAL_LOCATION = "original_location"
+
META_TABLE_COLUMNS = "columns"
META_TABLE_COLUMN_TYPES = "columns.types"
Modified: hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java (original)
+++ hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java Wed Jun 9 01:37:48 2010
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.metastore;
import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.util.List;
import org.apache.commons.lang.StringUtils;
@@ -148,9 +150,25 @@ public class HiveAlterHandler implements
List<Partition> parts = msdb.getPartitions(dbname, name, 0);
for (Partition part : parts) {
String oldPartLoc = part.getSd().getLocation();
- if (oldPartLoc.contains(oldTblLoc)) {
- part.getSd().setLocation(
- part.getSd().getLocation().replace(oldTblLoc, newTblLoc));
+ String oldTblLocPath = new Path(oldTblLoc).toUri().getPath();
+ String newTblLocPath = new Path(newTblLoc).toUri().getPath();
+ if (oldPartLoc.contains(oldTblLocPath)) {
+ URI newPartLocUri = null;
+ try {
+ URI oldPartLocUri = new URI(oldPartLoc);
+ newPartLocUri = new URI(
+ oldPartLocUri.getScheme(),
+ oldPartLocUri.getUserInfo(),
+ oldPartLocUri.getHost(),
+ oldPartLocUri.getPort(),
+ oldPartLocUri.getPath().replace(oldTblLocPath, newTblLocPath),
+ oldPartLocUri.getQuery(),
+ oldPartLocUri.getFragment());
+ } catch (URISyntaxException e) {
+ throw new InvalidOperationException("Old partition location " +
+ " is invalid. (" + oldPartLoc + ")");
+ }
+ part.getSd().setLocation(newPartLocUri.toString());
msdb.alterPartition(dbname, name, part);
}
}
Modified: hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (original)
+++ hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java Wed Jun 9 01:37:48 2010
@@ -1075,13 +1075,23 @@ public class HiveMetaStore extends Thrif
boolean success = false;
Path partPath = null;
Table tbl = null;
+ Partition part = null;
+ boolean isArchived = false;
+ Path archiveParentDir = null;
+
try {
ms.openTransaction();
- Partition part = get_partition(db_name, tbl_name, part_vals);
+ part = get_partition(db_name, tbl_name, part_vals);
+
if (part == null) {
throw new NoSuchObjectException("Partition doesn't exist. "
+ part_vals);
}
+
+ isArchived = MetaStoreUtils.isArchived(part);
+ if (isArchived) {
+ archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
+ }
if (part.getSd() == null || part.getSd().getLocation() == null) {
throw new MetaException("Partition metadata is corrupted");
}
@@ -1094,9 +1104,17 @@ public class HiveMetaStore extends Thrif
} finally {
if (!success) {
ms.rollbackTransaction();
- } else if (deleteData && (partPath != null)) {
+ } else if (deleteData && ((partPath != null) || (archiveParentDir != null))) {
if (tbl != null && !isExternal(tbl)) {
- wh.deleteDir(partPath, true);
+ // Archived partitions have har:/to_har_file as their location.
+ // The original directory was saved in params
+ if (isArchived) {
+ assert(archiveParentDir != null);
+ wh.deleteDir(archiveParentDir, true);
+ } else {
+ assert(partPath != null);
+ wh.deleteDir(partPath, true);
+ }
// ok even if the data is not deleted
}
}
Modified: hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (original)
+++ hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java Wed Jun 9 01:37:48 2010
@@ -834,6 +834,26 @@ public class MetaStoreUtils {
return "TRUE".equalsIgnoreCase(params.get("EXTERNAL"));
}
+ public static boolean isArchived(
+ org.apache.hadoop.hive.metastore.api.Partition part) {
+ Map<String, String> params = part.getParameters();
+ if ("true".equalsIgnoreCase(params.get(Constants.IS_ARCHIVED))) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ public static Path getOriginalLocation(
+ org.apache.hadoop.hive.metastore.api.Partition part) {
+ Map<String, String> params = part.getParameters();
+ assert(isArchived(part));
+ String originalLocation = params.get(Constants.ORIGINAL_LOCATION);
+ assert( originalLocation != null);
+
+ return new Path(originalLocation);
+ }
+
public static boolean isNonNativeTable(Table table) {
if (table == null) {
return false;
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Wed Jun 9 01:37:48 2010
@@ -28,6 +28,8 @@ import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Serializable;
import java.io.Writer;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
@@ -43,8 +45,10 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FsShell;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.Warehouse;
@@ -66,6 +70,7 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.plan.AddPartitionDesc;
import org.apache.hadoop.hive.ql.plan.AlterTableDesc;
+import org.apache.hadoop.hive.ql.plan.AlterTableSimpleDesc;
import org.apache.hadoop.hive.ql.plan.CreateTableDesc;
import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc;
import org.apache.hadoop.hive.ql.plan.CreateViewDesc;
@@ -78,7 +83,7 @@ import org.apache.hadoop.hive.ql.plan.Sh
import org.apache.hadoop.hive.ql.plan.ShowPartitionsDesc;
import org.apache.hadoop.hive.ql.plan.ShowTableStatusDesc;
import org.apache.hadoop.hive.ql.plan.ShowTablesDesc;
-import org.apache.hadoop.hive.ql.plan.TouchDesc;
+import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes;
import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.Deserializer;
@@ -88,8 +93,9 @@ import org.apache.hadoop.hive.serde2.Ser
import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.ShimLoader;
-
+import org.apache.hadoop.util.ToolRunner;
/**
* DDLTask implementation.
*
@@ -102,6 +108,12 @@ public class DDLTask extends Task<DDLWor
private static final int separator = Utilities.tabCode;
private static final int terminator = Utilities.newLineCode;
+ // These are suffixes attached to intermediate directory names used in the
+ // archiving / un-archiving process.
+ private static String INTERMEDIATE_ARCHIVED_DIR_SUFFIX;
+ private static String INTERMEDIATE_ORIGINAL_DIR_SUFFIX;
+ private static String INTERMEDIATE_EXTRACTED_DIR_SUFFIX;
+
public DDLTask() {
super();
}
@@ -110,6 +122,13 @@ public class DDLTask extends Task<DDLWor
public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext ctx) {
super.initialize(conf, queryPlan, ctx);
this.conf = conf;
+
+ INTERMEDIATE_ARCHIVED_DIR_SUFFIX =
+ HiveConf.getVar(conf, ConfVars.METASTORE_INT_ARCHIVED);
+ INTERMEDIATE_ORIGINAL_DIR_SUFFIX =
+ HiveConf.getVar(conf, ConfVars.METASTORE_INT_ORIGINAL);
+ INTERMEDIATE_EXTRACTED_DIR_SUFFIX =
+ HiveConf.getVar(conf, ConfVars.METASTORE_INT_EXTRACTED);
}
@Override
@@ -150,12 +169,17 @@ public class DDLTask extends Task<DDLWor
return addPartition(db, addPartitionDesc);
}
- TouchDesc touchDesc = work.getTouchDesc();
- if (touchDesc != null) {
- return touch(db, touchDesc);
- }
-
+ AlterTableSimpleDesc simpleDesc = work.getAlterTblSimpleDesc();
+ if(simpleDesc != null) {
+ if (simpleDesc.getType() == AlterTableTypes.TOUCH) {
+ return touch(db, simpleDesc);
+ } else if (simpleDesc.getType() == AlterTableTypes.ARCHIVE) {
+ return archive(db, simpleDesc, driverContext);
+ } else if (simpleDesc.getType() == AlterTableTypes.UNARCHIVE) {
+ return unarchive(db, simpleDesc);
+ }
+ }
MsckDesc msckDesc = work.getMsckDesc();
if (msckDesc != null) {
return msck(db, msckDesc);
@@ -257,7 +281,7 @@ public class DDLTask extends Task<DDLWor
* @return
* @throws HiveException
*/
- private int touch(Hive db, TouchDesc touchDesc)
+ private int touch(Hive db, AlterTableSimpleDesc touchDesc)
throws HiveException {
String dbName = touchDesc.getDbName();
@@ -290,6 +314,499 @@ public class DDLTask extends Task<DDLWor
}
return 0;
}
+ /**
+ * Determines whether a partition has been archived
+ *
+ * @param p
+ * @return
+ */
+
+ private boolean isArchived(Partition p) {
+ Map<String, String> params = p.getParameters();
+ if ("true".equalsIgnoreCase(params.get(
+ org.apache.hadoop.hive.metastore.api.Constants.IS_ARCHIVED))) {
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private void setIsArchived(Partition p, boolean state) {
+ Map<String, String> params = p.getParameters();
+ if (state) {
+ params.put(org.apache.hadoop.hive.metastore.api.Constants.IS_ARCHIVED,
+ "true");
+ } else {
+ params.remove(org.apache.hadoop.hive.metastore.api.Constants.IS_ARCHIVED);
+ }
+ }
+
+ private String getOriginalLocation(Partition p) {
+ Map<String, String> params = p.getParameters();
+ return params.get(
+ org.apache.hadoop.hive.metastore.api.Constants.ORIGINAL_LOCATION);
+ }
+
+ private void setOriginalLocation(Partition p, String loc) {
+ Map<String, String> params = p.getParameters();
+ if (loc == null) {
+ params.remove(org.apache.hadoop.hive.metastore.api.Constants.ORIGINAL_LOCATION);
+ } else {
+ params.put(org.apache.hadoop.hive.metastore.api.Constants.ORIGINAL_LOCATION, loc);
+ }
+ }
+
+ // Returns only the path component of the URI
+ private String getArchiveDirOnly(Path parentDir, String archiveName) {
+ URI parentUri = parentDir.toUri();
+ Path harDir = new Path(parentUri.getPath(), archiveName);
+ return harDir.toString();
+ }
+
+ /**
+ * Sets the appropriate attributes in the supplied Partition object to mark
+ * it as archived. Note that the metastore is not touched - a separate
+ * call to alter_partition is needed.
+ *
+ * @param p - the partition object to modify
+ * @param parentDir - the parent directory of the archive, which is the
+ * original directory that the partition's files resided in
+ * @param dirInArchive - the directory within the archive file that contains
+ * the partitions files
+ * @param archiveName - the name of the archive
+ * @throws URISyntaxException
+ */
+ private void setArchived(Partition p, Path parentDir, String dirInArchive, String archiveName)
+ throws URISyntaxException {
+ assert(isArchived(p) == false);
+ Map<String, String> params = p.getParameters();
+
+ URI parentUri = parentDir.toUri();
+ String parentHost = parentUri.getHost();
+ String harHost = null;
+ if (parentHost == null) {
+ harHost = "";
+ } else {
+ harHost = parentUri.getScheme() + "-" + parentHost;
+ }
+
+ // harUri is used to access the partition's files, which are in the archive
+ // The format of the RI is something like:
+ // har://underlyingfsscheme-host:port/archivepath
+ URI harUri = null;
+ if (dirInArchive.length() == 0) {
+ harUri = new URI("har", parentUri.getUserInfo(), harHost, parentUri.getPort(),
+ getArchiveDirOnly(parentDir, archiveName),
+ parentUri.getQuery(), parentUri.getFragment());
+ } else {
+ harUri = new URI("har", parentUri.getUserInfo(), harHost, parentUri.getPort(),
+ new Path(getArchiveDirOnly(parentDir, archiveName), dirInArchive).toUri().getPath(),
+ parentUri.getQuery(), parentUri.getFragment());
+ }
+ setIsArchived(p, true);
+ setOriginalLocation(p, parentDir.toString());
+ p.setLocation(harUri.toString());
+ }
+
+ /**
+ * Sets the appropriate attributes in the supplied Partition object to mark
+ * it as not archived. Note that the metastore is not touched - a separate
+ * call to alter_partition is needed.
+ *
+ * @param p - the partition to modify
+ */
+ private void setUnArchived(Partition p) {
+ assert(isArchived(p) == true);
+ String parentDir = getOriginalLocation(p);
+ setIsArchived(p, false);
+ setOriginalLocation(p, null);
+ assert(parentDir != null);
+ p.setLocation(parentDir);
+ }
+
+ private boolean pathExists(FileSystem fs, Path p) throws HiveException {
+ try {
+ return fs.exists(p);
+ } catch (IOException e) {
+ throw new HiveException(e);
+ }
+ }
+
+ private void moveDir(FileSystem fs, Path from, Path to) throws HiveException {
+ try {
+ if (!fs.rename(from, to)) {
+ throw new HiveException("Moving " + from + " to " + to + " failed!");
+ }
+ } catch (IOException e) {
+ throw new HiveException(e);
+ }
+ }
+
+ private void deleteDir(Path dir) throws HiveException {
+ try {
+ Warehouse wh = new Warehouse(conf);
+ wh.deleteDir(dir, true);
+ } catch (MetaException e) {
+ throw new HiveException(e);
+ }
+ }
+
+ private int archive(Hive db, AlterTableSimpleDesc simpleDesc, DriverContext driverContext)
+ throws HiveException {
+ String dbName = simpleDesc.getDbName();
+ String tblName = simpleDesc.getTableName();
+
+ Table tbl = db.getTable(dbName, tblName);
+ validateAlterTableType(tbl, AlterTableDesc.AlterTableTypes.ARCHIVE);
+
+ Map<String, String> partSpec = simpleDesc.getPartSpec();
+ Partition p = db.getPartition(tbl, partSpec, false);
+
+ if (tbl.getTableType() != TableType.MANAGED_TABLE) {
+ throw new HiveException("ARCHIVE can only be performed on managed tables");
+ }
+
+ if (p == null) {
+ throw new HiveException("Specified partition does not exist");
+ }
+
+ if (isArchived(p)) {
+ // If there were a failure right after the metadata was updated in an
+ // archiving operation, it's possible that the original, unarchived files
+ // weren't deleted.
+ Path originalDir = new Path(getOriginalLocation(p));
+ Path leftOverIntermediateOriginal = new Path(originalDir.getParent(),
+ originalDir.getName() + INTERMEDIATE_ORIGINAL_DIR_SUFFIX);
+ try {
+ if (pathExists(leftOverIntermediateOriginal.getFileSystem(conf),
+ leftOverIntermediateOriginal)) {
+ console.printInfo("Deleting " + leftOverIntermediateOriginal +
+ " left over from a previous archiving operation");
+ deleteDir(leftOverIntermediateOriginal);
+ }
+ } catch (IOException e) {
+ throw new HiveException(e);
+ }
+ throw new HiveException("Specified partition is already archived");
+ }
+
+ Path originalDir = p.getPartitionPath();
+ Path intermediateArchivedDir = new Path(originalDir.getParent(),
+ originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
+ Path intermediateOriginalDir = new Path(originalDir.getParent(),
+ originalDir.getName() + INTERMEDIATE_ORIGINAL_DIR_SUFFIX);
+ String archiveName = "data.har";
+ FileSystem fs = null;
+ try {
+ fs = originalDir.getFileSystem(conf);
+ } catch (IOException e) {
+ throw new HiveException(e);
+ }
+
+ // The following steps seem roundabout, but they are meant to aid in
+ // recovery if a failure occurs and to keep a consistent state in the FS
+
+ // Steps:
+ // 1. Create the archive in a temporary folder
+ // 2. Move the archive dir to an intermediate dir that is in at the same
+ // dir as the original partition dir. Call the new dir
+ // intermediate-archive.
+ // 3. Rename the original partition dir to an intermediate dir. Call the
+ // renamed dir intermediate-original
+ // 4. Rename intermediate-archive to the original partition dir
+ // 5. Change the metadata
+ // 6. Delete the original partition files in intermediate-original
+
+ // The original partition files are deleted after the metadata change
+ // because the presence of those files are used to indicate whether
+ // the original partition directory contains archived or unarchived files.
+
+ // Create an archived version of the partition in a directory ending in
+ // ARCHIVE_INTERMEDIATE_DIR_SUFFIX that's the same level as the partition,
+ // if it does not already exist. If it does exist, we assume the dir is good
+ // to use as the move operation that created it is atomic.
+ if (!pathExists(fs, intermediateArchivedDir) &&
+ !pathExists(fs, intermediateOriginalDir)) {
+
+ // First create the archive in a tmp dir so that if the job fails, the
+ // bad files don't pollute the filesystem
+ Path tmpDir = new Path(driverContext.getCtx().getMRScratchDir(), "partlevel");
+
+ console.printInfo("Creating " + archiveName + " for " + originalDir.toString());
+ console.printInfo("in " + tmpDir);
+ console.printInfo("Please wait... (this may take a while)");
+
+ // Create the Hadoop archive
+ HadoopShims shim = ShimLoader.getHadoopShims();
+ int ret=0;
+ try {
+ ret = shim.createHadoopArchive(conf, originalDir, tmpDir, archiveName);
+ } catch (Exception e) {
+ throw new HiveException(e);
+ }
+ if (ret != 0) {
+ throw new HiveException("Error while creating HAR");
+ }
+ // Move from the tmp dir to an intermediate directory, in the same level as
+ // the partition directory. e.g. .../hr=12-intermediate-archived
+ try {
+ console.printInfo("Moving " + tmpDir + " to " + intermediateArchivedDir);
+ if (pathExists(fs, intermediateArchivedDir)) {
+ throw new HiveException("The intermediate archive directory already exists.");
+ }
+ fs.rename(tmpDir, intermediateArchivedDir);
+ } catch (IOException e) {
+ throw new HiveException("Error while moving tmp directory");
+ }
+ } else {
+ if (pathExists(fs, intermediateArchivedDir)) {
+ console.printInfo("Intermediate archive directory " + intermediateArchivedDir +
+ " already exists. Assuming it contains an archived version of the partition");
+ }
+ }
+
+ // If we get to here, we know that we've archived the partition files, but
+ // they may be in the original partition location, or in the intermediate
+ // original dir.
+
+ // Move the original parent directory to the intermediate original directory
+ // if the move hasn't been made already
+ if (!pathExists(fs, intermediateOriginalDir)) {
+ console.printInfo("Moving " + originalDir + " to " +
+ intermediateOriginalDir);
+ moveDir(fs, originalDir, intermediateOriginalDir);
+ } else {
+ console.printInfo(intermediateOriginalDir + " already exists. " +
+ "Assuming it contains the original files in the partition");
+ }
+
+ // If there's a failure from here to when the metadata is updated,
+ // there will be no data in the partition, or an error while trying to read
+ // the partition (if the archive files have been moved to the original
+ // partition directory.) But re-running the archive command will allow
+ // recovery
+
+ // Move the intermediate archived directory to the original parent directory
+ if (!pathExists(fs, originalDir)) {
+ console.printInfo("Moving " + intermediateArchivedDir + " to " +
+ originalDir);
+ moveDir(fs, intermediateArchivedDir, originalDir);
+ } else {
+ console.printInfo(originalDir + " already exists. " +
+ "Assuming it contains the archived version of the partition");
+ }
+
+ // Record this change in the metastore
+ try {
+ boolean parentSettable =
+ conf.getBoolVar(HiveConf.ConfVars.HIVEHARPARENTDIRSETTABLE);
+
+ // dirInArchive is the directory within the archive that has all the files
+ // for this partition. With older versions of Hadoop, archiving a
+ // a directory would produce the same directory structure
+ // in the archive. So if you created myArchive.har of /tmp/myDir, the
+ // files in /tmp/myDir would be located under myArchive.har/tmp/myDir/*
+ // In this case, dirInArchive should be tmp/myDir
+
+ // With newer versions of Hadoop, the parent directory could be specified.
+ // Assuming the parent directory was set to /tmp/myDir when creating the
+ // archive, the files can be found under myArchive.har/*
+ // In this case, dirInArchive should be empty
+
+ String dirInArchive = "";
+ if (!parentSettable) {
+ dirInArchive = originalDir.toUri().getPath();
+ if(dirInArchive.length() > 1 && dirInArchive.charAt(0)=='/') {
+ dirInArchive = dirInArchive.substring(1);
+ }
+ }
+ setArchived(p, originalDir, dirInArchive, archiveName);
+ db.alterPartition(tblName, p);
+ } catch (Exception e) {
+ throw new HiveException("Unable to change the partition info for HAR", e);
+ }
+
+ // If a failure occurs here, the directory containing the original files
+ // will not be deleted. The user will run ARCHIVE again to clear this up
+ deleteDir(intermediateOriginalDir);
+
+
+ return 0;
+ }
+
+ private int unarchive(Hive db, AlterTableSimpleDesc simpleDesc)
+ throws HiveException {
+ String dbName = simpleDesc.getDbName();
+ String tblName = simpleDesc.getTableName();
+
+ Table tbl = db.getTable(dbName, tblName);
+ validateAlterTableType(tbl, AlterTableDesc.AlterTableTypes.UNARCHIVE);
+
+ // Means user specified a table, not a partition
+ if (simpleDesc.getPartSpec() == null) {
+ throw new HiveException("ARCHIVE is for partitions only");
+ }
+
+ Map<String, String> partSpec = simpleDesc.getPartSpec();
+ Partition p = db.getPartition(tbl, partSpec, false);
+
+ if (tbl.getTableType() != TableType.MANAGED_TABLE) {
+ throw new HiveException("UNARCHIVE can only be performed on managed tables");
+ }
+
+ if (p == null) {
+ throw new HiveException("Specified partition does not exist");
+ }
+
+ if (!isArchived(p)) {
+ Path location = new Path(p.getLocation());
+ Path leftOverArchiveDir = new Path(location.getParent(),
+ location.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
+
+ try {
+ if (pathExists(location.getFileSystem(conf), leftOverArchiveDir)) {
+ console.printInfo("Deleting " + leftOverArchiveDir + " left over " +
+ "from a previous unarchiving operation");
+ deleteDir(leftOverArchiveDir);
+ }
+ } catch (IOException e) {
+ throw new HiveException(e);
+ }
+ throw new HiveException("Specified partition is not archived");
+ }
+
+ Path originalLocation = new Path(getOriginalLocation(p));
+ Path sourceDir = new Path(p.getLocation());
+ Path intermediateArchiveDir = new Path(originalLocation.getParent(),
+ originalLocation.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
+ Path intermediateExtractedDir = new Path(originalLocation.getParent(),
+ originalLocation.getName() + INTERMEDIATE_EXTRACTED_DIR_SUFFIX);
+
+ Path tmpDir = new Path(driverContext.getCtx().getMRScratchDir());
+
+ FileSystem fs = null;
+ try {
+ fs = tmpDir.getFileSystem(conf);
+ // Verify that there are no files in the tmp dir, because if there are, it
+ // would be copied to the partition
+ FileStatus [] filesInTmpDir = fs.listStatus(tmpDir);
+ if (filesInTmpDir.length != 0) {
+ for (FileStatus file : filesInTmpDir) {
+ console.printInfo(file.getPath().toString());
+ }
+ throw new HiveException("Temporary directory " + tmpDir + " is not empty");
+ }
+
+ } catch (IOException e) {
+ throw new HiveException(e);
+ }
+
+ // Some sanity checks
+ if (originalLocation == null) {
+ throw new HiveException("Missing archive data in the partition");
+ }
+ if (!"har".equals(sourceDir.toUri().getScheme())) {
+ throw new HiveException("Location should refer to a HAR");
+ }
+
+ // Clarification of terms:
+ // - The originalLocation directory represents the original directory of the
+ // partition's files. They now contain an archived version of those files
+ // eg. hdfs:/warehouse/myTable/ds=1/
+ // - The source directory is the directory containing all the files that
+ // should be in the partition. e.g. har:/warehouse/myTable/ds=1/myTable.har/
+ // Note the har:/ scheme
+
+ // Steps:
+ // 1. Extract the archive in a temporary folder
+ // 2. Move the archive dir to an intermediate dir that is in at the same
+ // dir as originalLocation. Call the new dir intermediate-extracted.
+ // 3. Rename the original partition dir to an intermediate dir. Call the
+ // renamed dir intermediate-archive
+ // 4. Rename intermediate-extracted to the original partition dir
+ // 5. Change the metadata
+ // 6. Delete the archived partition files in intermediate-archive
+
+ if (!pathExists(fs, intermediateExtractedDir) &&
+ !pathExists(fs, intermediateArchiveDir)) {
+ try {
+
+ // Copy the files out of the archive into the temporary directory
+ String copySource = (new Path(sourceDir, "*")).toString();
+ String copyDest = tmpDir.toString();
+ List<String> args = new ArrayList<String>();
+ args.add("-cp");
+ args.add(copySource);
+ args.add(copyDest);
+
+ console.printInfo("Copying " + copySource + " to " + copyDest);
+ FsShell fss = new FsShell(conf);
+ int ret = 0;
+ try {
+ ret = ToolRunner.run(fss, args.toArray(new String[0]));
+ } catch (Exception e) {
+ throw new HiveException(e);
+ }
+ if (ret != 0) {
+ throw new HiveException("Error while copying files from archive");
+ }
+
+ console.printInfo("Moving " + tmpDir + " to " + intermediateExtractedDir);
+ if (fs.exists(intermediateExtractedDir)) {
+ throw new HiveException("Invalid state: the intermediate extracted " +
+ "directory already exists.");
+ }
+ fs.rename(tmpDir, intermediateExtractedDir);
+ } catch (Exception e) {
+ throw new HiveException(e);
+ }
+ }
+
+ // At this point, we know that the extracted files are in the intermediate
+ // extracted dir, or in the the original directory.
+
+ if (!pathExists(fs, intermediateArchiveDir)) {
+ try {
+ console.printInfo("Moving " + originalLocation + " to " + intermediateArchiveDir);
+ fs.rename(originalLocation, intermediateArchiveDir);
+ } catch (IOException e) {
+ throw new HiveException(e);
+ }
+ } else {
+ console.printInfo(intermediateArchiveDir + " already exists. " +
+ "Assuming it contains the archived version of the partition");
+ }
+
+ // If there is a failure from here to until when the metadata is changed,
+ // the partition will be empty or throw errors on read.
+
+ // If the original location exists here, then it must be the extracted files
+ // because in the previous step, we moved the previous original location
+ // (containing the archived version of the files) to intermediateArchiveDir
+ if (!pathExists(fs, originalLocation)) {
+ try {
+ console.printInfo("Moving " + intermediateExtractedDir + " to " + originalLocation);
+ fs.rename(intermediateExtractedDir, originalLocation);
+ } catch (IOException e) {
+ throw new HiveException(e);
+ }
+ } else {
+ console.printInfo(originalLocation + " already exists. " +
+ "Assuming it contains the extracted files in the partition");
+ }
+
+ setUnArchived(p);
+ try {
+ db.alterPartition(tblName, p);
+ } catch (InvalidOperationException e) {
+ throw new HiveException(e);
+ }
+ // If a failure happens here, the intermediate archive files won't be
+ // deleted. The user will need to call unarchive again to clear those up.
+ deleteDir(intermediateArchiveDir);
+
+ return 0;
+ }
private void validateAlterTableType(
Table tbl, AlterTableDesc.AlterTableTypes alterType) throws HiveException {
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java Wed Jun 9 01:37:48 2010
@@ -51,7 +51,7 @@ import org.apache.thrift.transport.TMemo
/**
* A Hive Table Partition: is a fundamental storage unit within a Table.
- *
+ *
* Please note that the ql code should always go through methods of this class to access the
* metadata, instead of directly accessing org.apache.hadoop.hive.metastore.api.Partition.
* This helps to isolate the metastore code and the ql code.
@@ -72,7 +72,7 @@ public class Partition implements Serial
private Class<? extends HiveOutputFormat> outputFormatClass;
private Class<? extends InputFormat> inputFormatClass;
private URI uri;
-
+
/**
* @return The values of the partition
* @see org.apache.hadoop.hive.metastore.api.Partition#getValues()
@@ -82,17 +82,17 @@ public class Partition implements Serial
}
/**
- * Used only for serialization.
+ * Used only for serialization.
*/
public Partition() {
}
-
+
/**
* create an empty partition.
* SemanticAnalyzer code requires that an empty partition when the table is not partitioned.
*/
public Partition(Table tbl) throws HiveException {
- org.apache.hadoop.hive.metastore.api.Partition tPart =
+ org.apache.hadoop.hive.metastore.api.Partition tPart =
new org.apache.hadoop.hive.metastore.api.Partition();
tPart.setSd(tbl.getTTable().getSd()); // TODO: get a copy
initialize(tbl, tPart);
@@ -105,7 +105,7 @@ public class Partition implements Serial
/**
* Create partition object with the given info.
- *
+ *
* @param tbl
* Table the partition will be in.
* @param partSpec
@@ -158,7 +158,7 @@ public class Partition implements Serial
/**
* Initializes this object with the given variables
- *
+ *
* @param table
* Table the partition belongs to
* @param tPartition
@@ -265,7 +265,7 @@ public class Partition implements Serial
clsName = tPartition.getSd().getInputFormat();
}
if (clsName == null) {
- clsName = org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName();
+ clsName = org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName();
}
try {
inputFormatClass = ((Class<? extends InputFormat>) Class.forName(clsName, true,
@@ -285,10 +285,10 @@ public class Partition implements Serial
clsName = tPartition.getSd().getOutputFormat();
}
if (clsName == null) {
- clsName = HiveSequenceFileOutputFormat.class.getName();
+ clsName = HiveSequenceFileOutputFormat.class.getName();
}
try {
- Class<?> c = (Class<? extends HiveOutputFormat>)(Class.forName(clsName, true,
+ Class<?> c = (Class.forName(clsName, true,
JavaUtils.getClassLoader()));
// Replace FileOutputFormat for backward compatibility
if (!HiveOutputFormat.class.isAssignableFrom(c)) {
@@ -312,7 +312,7 @@ public class Partition implements Serial
/*
* TODO: Keeping this code around for later use when we will support
* sampling on tables which are not created with CLUSTERED INTO clause
- *
+ *
* // read from table meta data int numBuckets = this.table.getNumBuckets();
* if (numBuckets == -1) { // table meta data does not have bucket
* information // check if file system has multiple buckets(files) in this
@@ -344,7 +344,9 @@ public class Partition implements Serial
@SuppressWarnings("nls")
public Path getBucketPath(int bucketNum) {
try {
- FileSystem fs = FileSystem.get(table.getDataLocation(), Hive.get()
+ // Previously, this got the filesystem of the Table, which could be
+ // different from the filesystem of the partition.
+ FileSystem fs = FileSystem.get(getPartitionPath().toUri(), Hive.get()
.getConf());
String pathPattern = getPartitionPath().toString();
if (getBucketCount() > 0) {
@@ -445,7 +447,7 @@ public class Partition implements Serial
public org.apache.hadoop.hive.metastore.api.Partition getTPartition() {
return tPartition;
}
-
+
/**
* Should be only used by serialization.
*/
@@ -462,5 +464,11 @@ public class Partition implements Serial
return tPartition.getSd().getCols();
}
-
+ public String getLocation() {
+ return tPartition.getSd().getLocation();
+ }
+
+ public void setLocation(String location) {
+ tPartition.getSd().setLocation(location);
+ }
}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Wed Jun 9 01:37:48 2010
@@ -20,11 +20,14 @@ package org.apache.hadoop.hive.ql.parse;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
+import java.util.Set;
+import java.util.Map.Entry;
import org.antlr.runtime.tree.CommonTree;
import org.antlr.runtime.tree.Tree;
@@ -32,6 +35,7 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Order;
@@ -40,6 +44,7 @@ import org.apache.hadoop.hive.ql.exec.Ta
import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat;
import org.apache.hadoop.hive.ql.plan.AddPartitionDesc;
import org.apache.hadoop.hive.ql.plan.AlterTableDesc;
+import org.apache.hadoop.hive.ql.plan.AlterTableSimpleDesc;
import org.apache.hadoop.hive.ql.plan.DDLWork;
import org.apache.hadoop.hive.ql.plan.DescFunctionDesc;
import org.apache.hadoop.hive.ql.plan.DescTableDesc;
@@ -51,7 +56,6 @@ import org.apache.hadoop.hive.ql.plan.Sh
import org.apache.hadoop.hive.ql.plan.ShowTableStatusDesc;
import org.apache.hadoop.hive.ql.plan.ShowTablesDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.ql.plan.TouchDesc;
import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
@@ -64,6 +68,8 @@ import org.apache.hadoop.mapred.TextInpu
public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
private static final Log LOG = LogFactory.getLog("hive.ql.parse.DDLSemanticAnalyzer");
public static final Map<Integer, String> TokenToTypeName = new HashMap<Integer, String>();
+
+ public static final Set<String> reservedPartitionValues = new HashSet<String>();
static {
TokenToTypeName.put(HiveParser.TOK_BOOLEAN, Constants.BOOLEAN_TYPE_NAME);
TokenToTypeName.put(HiveParser.TOK_TINYINT, Constants.TINYINT_TYPE_NAME);
@@ -89,6 +95,12 @@ public class DDLSemanticAnalyzer extends
public DDLSemanticAnalyzer(HiveConf conf) throws SemanticException {
super(conf);
+ // Partition can't have this name
+ reservedPartitionValues.add(HiveConf.getVar(conf, ConfVars.DEFAULTPARTITIONNAME));
+ // Partition value can't end in this suffix
+ reservedPartitionValues.add(HiveConf.getVar(conf, ConfVars.METASTORE_INT_ORIGINAL));
+ reservedPartitionValues.add(HiveConf.getVar(conf, ConfVars.METASTORE_INT_ARCHIVED));
+ reservedPartitionValues.add(HiveConf.getVar(conf, ConfVars.METASTORE_INT_EXTRACTED));
}
@Override
@@ -121,6 +133,10 @@ public class DDLSemanticAnalyzer extends
analyzeAlterTableRename(ast);
} else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_TOUCH) {
analyzeAlterTableTouch(ast);
+ } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_ARCHIVE) {
+ analyzeAlterTableArchive(ast, false);
+ } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_UNARCHIVE) {
+ analyzeAlterTableArchive(ast, true);
} else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_ADDCOLS) {
analyzeAlterTableModifyCols(ast, AlterTableTypes.ADDCOLS);
} else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_REPLACECOLS) {
@@ -551,6 +567,7 @@ public class DDLSemanticAnalyzer extends
break;
case HiveParser.TOK_PARTSPEC:
if (currentPart != null) {
+ validatePartitionValues(currentPart);
AddPartitionDesc addPartitionDesc = new AddPartitionDesc(
MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, currentPart,
currentLocation, ifNotExists);
@@ -572,6 +589,7 @@ public class DDLSemanticAnalyzer extends
// add the last one
if (currentPart != null) {
+ validatePartitionValues(currentPart);
AddPartitionDesc addPartitionDesc = new AddPartitionDesc(
MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, currentPart,
currentLocation, ifNotExists);
@@ -599,20 +617,50 @@ public class DDLSemanticAnalyzer extends
List<Map<String, String>> partSpecs = getPartitionSpecs(ast);
if (partSpecs.size() == 0) {
- TouchDesc touchDesc = new TouchDesc(
- MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, null);
+ AlterTableSimpleDesc touchDesc = new AlterTableSimpleDesc(
+ MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, null,
+ AlterTableDesc.AlterTableTypes.TOUCH);
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
touchDesc), conf));
} else {
for (Map<String, String> partSpec : partSpecs) {
- TouchDesc touchDesc = new TouchDesc(
- MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, partSpec);
+ AlterTableSimpleDesc touchDesc = new AlterTableSimpleDesc(
+ MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, partSpec,
+ AlterTableDesc.AlterTableTypes.TOUCH);
rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
touchDesc), conf));
}
}
}
+ private void analyzeAlterTableArchive(CommonTree ast, boolean isUnArchive)
+ throws SemanticException {
+
+ if (!conf.getBoolVar(HiveConf.ConfVars.HIVEARCHIVEENABLED)) {
+ throw new SemanticException(ErrorMsg.ARCHIVE_METHODS_DISABLED.getMsg());
+
+ }
+ String tblName = unescapeIdentifier(ast.getChild(0).getText());
+ // partition name to value
+ List<Map<String, String>> partSpecs = getPartitionSpecs(ast);
+ if (partSpecs.size() > 1 ) {
+ throw new SemanticException(isUnArchive ?
+ ErrorMsg.UNARCHIVE_ON_MULI_PARTS.getMsg() :
+ ErrorMsg.ARCHIVE_ON_MULI_PARTS.getMsg());
+ }
+ if (partSpecs.size() == 0) {
+ throw new SemanticException(ErrorMsg.ARCHIVE_ON_TABLE.getMsg());
+ }
+
+ Map<String,String> partSpec = partSpecs.get(0);
+ AlterTableSimpleDesc archiveDesc = new AlterTableSimpleDesc(
+ MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, partSpec,
+ (isUnArchive ? AlterTableTypes.UNARCHIVE : AlterTableTypes.ARCHIVE));
+ rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
+ archiveDesc), conf));
+
+ }
+
/**
* Verify that the information in the metastore matches up with the data on
* the fs.
@@ -667,4 +715,25 @@ public class DDLSemanticAnalyzer extends
}
return partSpecs;
}
+
+ /**
+ * Certain partition values are are used by hive. e.g. the default partition
+ * in dynamic partitioning and the intermediate partition values used in the
+ * archiving process. Naturally, prohibit the user from creating partitions
+ * with these reserved values. The check that this function is more
+ * restrictive than the actual limitation, but it's simpler. Should be okay
+ * since the reserved names are fairly long and uncommon.
+ */
+ private void validatePartitionValues(Map<String, String> partSpec)
+ throws SemanticException {
+
+ for (Entry<String, String> e : partSpec.entrySet()) {
+ for (String s : reservedPartitionValues) {
+ if (e.getValue().contains(s)) {
+ throw new SemanticException(ErrorMsg.RESERVED_PART_VAL.getMsg(
+ "(User value: " + e.getValue() + " Reserved substring: " + s + ")"));
+ }
+ }
+ }
+ }
}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java Wed Jun 9 01:37:48 2010
@@ -149,7 +149,16 @@ public enum ErrorMsg {
UNSUPPORTED_TYPE("DATE, DATETIME, and TIMESTAMP types aren't supported yet. Please use "
+ "STRING instead."),
CREATE_NON_NATIVE_AS("CREATE TABLE AS SELECT cannot be used for a non-native table"),
- LOAD_INTO_NON_NATIVE("A non-native table cannot be used as target for LOAD");
+ LOAD_INTO_NON_NATIVE("A non-native table cannot be used as target for LOAD"),
+ OVERWRITE_ARCHIVED_PART("Cannot overwrite an archived partition. " +
+ "Unarchive before running this command."),
+ ARCHIVE_METHODS_DISABLED("Archiving methods are currently disabled. " +
+ "Please see the Hive wiki for more information about enabling archiving."),
+ ARCHIVE_ON_MULI_PARTS("ARCHIVE can only be run on a single partition"),
+ UNARCHIVE_ON_MULI_PARTS("ARCHIVE can only be run on a single partition"),
+ ARCHIVE_ON_TABLE("ARCHIVE can only be run on partitions"),
+ RESERVED_PART_VAL("Partition value contains a reserved substring"),
+ ;
private String mesg;
private String sqlState;
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g Wed Jun 9 01:37:48 2010
@@ -95,6 +95,8 @@ TOK_ALTERTABLE_REPLACECOLS;
TOK_ALTERTABLE_ADDPARTS;
TOK_ALTERTABLE_DROPPARTS;
TOK_ALTERTABLE_TOUCH;
+TOK_ALTERTABLE_ARCHIVE;
+TOK_ALTERTABLE_UNARCHIVE;
TOK_ALTERTABLE_SERDEPROPERTIES;
TOK_ALTERTABLE_SERIALIZER;
TOK_ALTERTABLE_FILEFORMAT;
@@ -281,6 +283,8 @@ alterTableStatementSuffix
| alterStatementSuffixDropPartitions
| alterStatementSuffixAddPartitions
| alterStatementSuffixTouch
+ | alterStatementSuffixArchive
+ | alterStatementSuffixUnArchive
| alterStatementSuffixProperties
| alterStatementSuffixSerdeProperties
| alterStatementSuffixFileFormat
@@ -335,6 +339,20 @@ alterStatementSuffixTouch
-> ^(TOK_ALTERTABLE_TOUCH Identifier (partitionSpec)*)
;
+alterStatementSuffixArchive
+@init { msgs.push("archive statement"); }
+@after { msgs.pop(); }
+ : Identifier KW_ARCHIVE (partitionSpec)*
+ -> ^(TOK_ALTERTABLE_ARCHIVE Identifier (partitionSpec)*)
+ ;
+
+alterStatementSuffixUnArchive
+@init { msgs.push("unarchive statement"); }
+@after { msgs.pop(); }
+ : Identifier KW_UNARCHIVE (partitionSpec)*
+ -> ^(TOK_ALTERTABLE_UNARCHIVE Identifier (partitionSpec)*)
+ ;
+
partitionLocation
@init { msgs.push("partition location"); }
@after { msgs.pop(); }
@@ -1625,6 +1643,8 @@ KW_RECORDWRITER: 'RECORDWRITER';
KW_SEMI: 'SEMI';
KW_LATERAL: 'LATERAL';
KW_TOUCH: 'TOUCH';
+KW_ARCHIVE: 'ARCHIVE';
+KW_UNARCHIVE: 'UNARCHIVE';
// Operators
// NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work.
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Wed Jun 9 01:37:48 2010
@@ -3253,6 +3253,10 @@ public class SemanticAnalyzer extends Ba
dest_tab = dest_part.getTable();
dest_path = dest_part.getPath()[0];
+ if ("har".equalsIgnoreCase(dest_path.toUri().getScheme())) {
+ throw new SemanticException(ErrorMsg.OVERWRITE_ARCHIVED_PART
+ .getMsg());
+ }
queryTmpdir = ctx.getExternalTmpFileURI(dest_path.toUri());
table_desc = Utilities.getTableDesc(dest_tab);
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java Wed Jun 9 01:37:48 2010
@@ -46,6 +46,8 @@ public final class SemanticAnalyzerFacto
commandType.put(HiveParser.TOK_ALTERTABLE_DROPPARTS, "ALTERTABLE_DROPPARTS");
commandType.put(HiveParser.TOK_ALTERTABLE_ADDPARTS, "ALTERTABLE_ADDPARTS");
commandType.put(HiveParser.TOK_ALTERTABLE_TOUCH, "ALTERTABLE_TOUCH");
+ commandType.put(HiveParser.TOK_ALTERTABLE_ARCHIVE, "ALTERTABLE_ARCHIVE");
+ commandType.put(HiveParser.TOK_ALTERTABLE_UNARCHIVE, "ALTERTABLE_UNARCHIVE");
commandType.put(HiveParser.TOK_ALTERTABLE_PROPERTIES, "ALTERTABLE_PROPERTIES");
commandType.put(HiveParser.TOK_ALTERTABLE_SERIALIZER, "ALTERTABLE_SERIALIZER");
commandType.put(HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES, "ALTERTABLE_SERDEPROPERTIES");
@@ -98,6 +100,8 @@ public final class SemanticAnalyzerFacto
case HiveParser.TOK_ALTERTABLE_FILEFORMAT:
case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT:
case HiveParser.TOK_ALTERTABLE_TOUCH:
+ case HiveParser.TOK_ALTERTABLE_ARCHIVE:
+ case HiveParser.TOK_ALTERTABLE_UNARCHIVE:
return new DDLSemanticAnalyzer(conf);
case HiveParser.TOK_CREATEFUNCTION:
case HiveParser.TOK_DROPFUNCTION:
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java Wed Jun 9 01:37:48 2010
@@ -42,7 +42,7 @@ public class AlterTableDesc extends DDLD
public static enum AlterTableTypes {
RENAME, ADDCOLS, REPLACECOLS, ADDPROPS, ADDSERDE, ADDSERDEPROPS,
ADDFILEFORMAT, ADDCLUSTERSORTCOLUMN, RENAMECOLUMN, ADDPARTITION,
- TOUCH
+ TOUCH, ARCHIVE, UNARCHIVE,
};
AlterTableTypes op;
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableSimpleDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableSimpleDesc.java?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableSimpleDesc.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableSimpleDesc.java Wed Jun 9 01:37:48 2010
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.plan;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes;
+
+/**
+ * Contains information needed to modify a partition or a table
+ */
+public class AlterTableSimpleDesc extends DDLDesc {
+ private String tableName;
+ private String dbName;
+ private LinkedHashMap<String, String> partSpec;
+
+ AlterTableTypes type;
+
+ public AlterTableSimpleDesc() {
+ }
+
+ /**
+ * @param dbName
+ * database that contains the table / partition
+ * @param tableName
+ * table containing the partition
+ * @param partSpec
+ * partition specification. Null if touching a table.
+ */
+ public AlterTableSimpleDesc(String dbName, String tableName,
+ Map<String, String> partSpec, AlterTableDesc.AlterTableTypes type) {
+ super();
+ this.dbName = dbName;
+ this.tableName = tableName;
+ if(partSpec == null) {
+ this.partSpec = null;
+ } else {
+ this.partSpec = new LinkedHashMap<String,String>(partSpec);
+ }
+ this.type = type;
+ }
+
+ public String getTableName() {
+ return tableName;
+ }
+
+ public void setTableName(String tableName) {
+ this.tableName = tableName;
+ }
+
+ public String getDbName() {
+ return dbName;
+ }
+
+ public void setDbName(String dbName) {
+ this.dbName = dbName;
+ }
+
+ public AlterTableDesc.AlterTableTypes getType() {
+ return type;
+ }
+
+ public void setType(AlterTableDesc.AlterTableTypes type) {
+ this.type = type;
+ }
+
+ public LinkedHashMap<String, String> getPartSpec() {
+ return partSpec;
+ }
+
+ public void setPartSpec(LinkedHashMap<String, String> partSpec) {
+ this.partSpec = partSpec;
+ }
+
+}
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveDesc.java?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveDesc.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveDesc.java Wed Jun 9 01:37:48 2010
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.plan;
+
+/**
+ * ArchiveDesc.
+ *
+ */
+public class ArchiveDesc extends DDLDesc {
+
+}
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java Wed Jun 9 01:37:48 2010
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.plan;
+
+import java.io.Serializable;
+import java.util.LinkedHashMap;
+
+/**
+ * ArchiveWork.
+ *
+ */
+@Explain(displayName = "Map Reduce")
+public class ArchiveWork implements Serializable {
+ private static final long serialVersionUID = 1L;
+ private String tableName;
+ private String dbName;
+ private LinkedHashMap<String, String> partSpec;
+ private ArchiveActionType type;
+
+ public static enum ArchiveActionType {
+ ARCHIVE, UNARCHIVE
+ };
+
+
+ public ArchiveActionType getType() {
+ return type;
+ }
+
+ public void setType(ArchiveActionType type) {
+ this.type = type;
+ }
+
+ public ArchiveWork() {
+ }
+
+}
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java Wed Jun 9 01:37:48 2010
@@ -42,7 +42,7 @@ public class DDLWork implements Serializ
private ShowPartitionsDesc showPartsDesc;
private DescTableDesc descTblDesc;
private AddPartitionDesc addPartitionDesc;
- private TouchDesc touchDesc;
+ private AlterTableSimpleDesc alterTblSimpleDesc;
private MsckDesc msckDesc;
private ShowTableStatusDesc showTblStatusDesc;
@@ -183,10 +183,10 @@ public class DDLWork implements Serializ
* information about the table/partitions that we want to touch
*/
public DDLWork(HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs,
- TouchDesc touchDesc) {
+ AlterTableSimpleDesc simpleDesc) {
this(inputs, outputs);
- this.touchDesc = touchDesc;
+ this.alterTblSimpleDesc = simpleDesc;
}
public DDLWork(HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs,
@@ -383,18 +383,18 @@ public class DDLWork implements Serializ
}
/**
- * @return information about the table/partitionss we want to touch.
+ * @return information about the table/partitions we want to alter.
*/
- public TouchDesc getTouchDesc() {
- return touchDesc;
+ public AlterTableSimpleDesc getAlterTblSimpleDesc() {
+ return alterTblSimpleDesc;
}
/**
- * @param touchDesc
- * information about the table/partitions we want to touch.
+ * @param desc
+ * information about the table/partitions we want to alter.
*/
- public void setTouchDesc(TouchDesc touchDesc) {
- this.touchDesc = touchDesc;
+ public void setAlterTblSimpleDesc(AlterTableSimpleDesc desc) {
+ this.alterTblSimpleDesc = desc;
}
/**
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TouchDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TouchDesc.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TouchDesc.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TouchDesc.java Wed Jun 9 01:37:48 2010
@@ -1,81 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.plan;
-
-import java.util.LinkedHashMap;
-import java.util.Map;
-
-/**
- * Contains information needed to touch a partition (cause pre/post hooks to
- * fire).
- */
-public class TouchDesc extends DDLDesc {
- private String tableName;
- private String dbName;
- private LinkedHashMap<String, String> partSpec;
-
-
- public TouchDesc() {
- }
-
- /**
- * @param dbName
- * database that contains the table / partition
- * @param tableName
- * table containing the partition
- * @param partSpec
- * partition specification. Null if touching a table.
- */
- public TouchDesc(String dbName, String tableName,
- Map<String, String> partSpec) {
- super();
- this.dbName = dbName;
- this.tableName = tableName;
- if(partSpec == null) {
- this.partSpec = null;
- } else {
- this.partSpec = new LinkedHashMap<String,String>(partSpec);
- }
- }
-
- public String getTableName() {
- return tableName;
- }
-
- public void setTableName(String tableName) {
- this.tableName = tableName;
- }
-
- public String getDbName() {
- return dbName;
- }
-
- public void setDbName(String dbName) {
- this.dbName = dbName;
- }
-
- public LinkedHashMap<String, String> getPartSpec() {
- return partSpec;
- }
-
- public void setPartSpec(LinkedHashMap<String, String> partSpec) {
- this.partSpec = partSpec;
- }
-
-}
Modified: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (original)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java Wed Jun 9 01:37:48 2010
@@ -29,8 +29,10 @@ import java.io.Serializable;
import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
+import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -80,6 +82,7 @@ public class QTestUtil {
private final String outDir;
private final String logDir;
private final TreeMap<String, String> qMap;
+ private final Set<String> qSkipSet;
private final LinkedList<String> srcTables;
private ParseDriver pd;
@@ -173,6 +176,7 @@ public class QTestUtil {
conf = new HiveConf(Driver.class);
this.miniMr = miniMr;
qMap = new TreeMap<String, String>();
+ qSkipSet = new HashSet<String>();
if (miniMr) {
dfs = ShimLoader.getHadoopShims().getMiniDfs(conf, 4, true, null);
@@ -230,12 +234,44 @@ public class QTestUtil {
DataInputStream dis = new DataInputStream(bis);
StringBuilder qsb = new StringBuilder();
+ // Look for a hint to not run a test on some Hadoop versions
+ Pattern pattern = Pattern.compile("-- EXCLUDE_HADOOP_MAJOR_VERSIONS(.*)");
+
+
// Read the entire query
+ boolean excludeQuery = false;
+ String hadoopVer = ShimLoader.getMajorVersion();
while (dis.available() != 0) {
- qsb.append(dis.readLine() + "\n");
+ String line = dis.readLine();
+
+ // While we are reading the lines, detect whether this query wants to be
+ // excluded from running because the Hadoop version is incorrect
+ Matcher matcher = pattern.matcher(line);
+ if (matcher.find()) {
+ String group = matcher.group();
+ int start = group.indexOf('(');
+ int end = group.indexOf(')');
+ assert end > start;
+ // versions might be something like '0.17, 0.19'
+ String versions = group.substring(start+1, end);
+
+ Set<String> excludedVersionSet = new HashSet<String>();
+ for (String s : versions.split("\\,")) {
+ s = s.trim();
+ excludedVersionSet.add(s);
+ }
+ if (excludedVersionSet.contains(hadoopVer)) {
+ excludeQuery = true;
+ }
+ }
+ qsb.append(line + "\n");
}
qMap.put(qf.getName(), qsb.toString());
-
+ if(excludeQuery) {
+ System.out.println("Due to the Hadoop Version ("+ hadoopVer + "), " +
+ "adding query " + qf.getName() + " to the set of tests to skip");
+ qSkipSet.add(qf.getName());
+ }
dis.close();
}
@@ -504,6 +540,10 @@ public class QTestUtil {
return cliDriver.processLine(qMap.get(tname));
}
+ public boolean shouldBeSkipped(String tname) {
+ return qSkipSet.contains(tname);
+ }
+
public void convertSequenceFileToTextFile() throws Exception {
// Create an instance of hive in order to create the tables
testWarehouse = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE);
@@ -745,6 +785,7 @@ public class QTestUtil {
public int checkCliDriverResults(String tname) throws Exception {
String[] cmdArray;
+ assert(qMap.containsKey(tname));
cmdArray = new String[] {
"diff", "-a",
@@ -816,7 +857,7 @@ public class QTestUtil {
/**
* QTRunner: Runnable class for running a a single query file.
- *
+ *
**/
public static class QTRunner implements Runnable {
private final QTestUtil qt;
@@ -845,7 +886,7 @@ public class QTestUtil {
/**
* executes a set of query files either in sequence or in parallel. Uses
* QTestUtil to do so
- *
+ *
* @param qfiles
* array of input query files containing arbitrary number of hive
* queries
@@ -855,7 +896,7 @@ public class QTestUtil {
* @param mt
* whether to run in multithreaded mode or not
* @return true if all the query files were executed successfully, else false
- *
+ *
* In multithreaded mode each query file is run in a separate thread.
* the caller has to arrange that different query files do not collide
* (in terms of destination tables)
@@ -923,7 +964,7 @@ public class QTestUtil {
}
return (!failed);
}
-
+
public static void outputTestFailureHelpMessage() {
System.err.println("See build/ql/tmp/hive.log, "
+ "or try \"ant test ... -Dtest.silent=false\" to get more logs.");
Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive1.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive1.q?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive1.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive1.q Wed Jun 9 01:37:48 2010
@@ -0,0 +1,11 @@
+set hive.archive.enabled = true;
+-- Tests trying to archive a partition twice.
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19)
+
+CREATE TABLE srcpart_archived LIKE srcpart;
+
+INSERT OVERWRITE TABLE srcpart_archived PARTITION (ds='2008-04-08', hr='12')
+SELECT key, value FROM srcpart WHERE ds='2008-04-08' AND hr='12';
+
+ALTER TABLE srcpart_archived ARCHIVE PARTITION (ds='2008-04-08', hr='12');
+ALTER TABLE srcpart_archived ARCHIVE PARTITION (ds='2008-04-08', hr='12');
Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive2.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive2.q?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive2.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive2.q Wed Jun 9 01:37:48 2010
@@ -0,0 +1,5 @@
+set hive.archive.enabled = true;
+-- Tests trying to unarchive a non-archived partition
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19)
+
+ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12');
Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive3.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive3.q?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive3.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive3.q Wed Jun 9 01:37:48 2010
@@ -0,0 +1,5 @@
+set hive.archive.enabled = true;
+-- Tests archiving a table
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19)
+
+ALTER TABLE srcpart ARCHIVE;
Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive4.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive4.q?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive4.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive4.q Wed Jun 9 01:37:48 2010
@@ -0,0 +1,5 @@
+set hive.archive.enabled = true;
+-- Tests archiving multiple partitions
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19)
+
+ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') PARTITION (ds='2008-04-08', hr='11');
Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive5.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive5.q?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive5.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive5.q Wed Jun 9 01:37:48 2010
@@ -0,0 +1,5 @@
+set hive.archive.enabled = true;
+-- Tests creating a partition where the partition value will collide with the
+-- a intermediate directory
+
+ALTER TABLE srcpart ADD PARTITION (ds='2008-04-08', hr='14_INTERMEDIATE_ORIGINAL')
Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/archive.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/archive.q?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/archive.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/archive.q Wed Jun 9 01:37:48 2010
@@ -0,0 +1,44 @@
+set hive.archive.enabled = true;
+set hive.enforce.bucketing = true;
+
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19)
+
+SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
+FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2;
+
+ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12');
+
+SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
+FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2;
+
+ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12');
+
+SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
+FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2;
+
+CREATE TABLE harbucket(key INT)
+PARTITIONED by (ds STRING)
+CLUSTERED BY (key) INTO 10 BUCKETS;
+
+INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM src WHERE key < 50;
+
+SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key;
+ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12');
+SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key;
+ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12');
+SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key;
+
+DROP TABLE harbucket;
+
+CREATE TABLE old_name(key INT)
+PARTITIONED by (ds STRING);
+
+INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM src WHERE key < 50;
+ALTER TABLE old_name ARCHIVE PARTITION (ds='1');
+SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
+FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2;
+ALTER TABLE old_name RENAME TO new_name;
+SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col
+FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2;
+
+DROP TABLE new_name;