You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pa...@apache.org on 2011/08/10 00:04:22 UTC
svn commit: r1155573 - in /hive/trunk/metastore: scripts/upgrade/derby/
scripts/upgrade/mysql/ src/java/org/apache/hadoop/hive/metastore/
src/model/ src/model/org/apache/hadoop/hive/metastore/model/
Author: pauly
Date: Tue Aug 9 22:04:20 2011
New Revision: 1155573
URL: http://svn.apache.org/viewvc?rev=1155573&view=rev
Log:
HIVE-2246. Dedupe tables' column schemas from partitions in the metastore db (Sohan Jain via pauly)
Added:
hive/trunk/metastore/scripts/upgrade/derby/008-HIVE-2246.derby.sql
hive/trunk/metastore/scripts/upgrade/derby/008-REVERT-HIVE-2246.derby.sql
hive/trunk/metastore/scripts/upgrade/mysql/008-HIVE-2246.mysql.sql
hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MColumnDescriptor.java
Modified:
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MStorageDescriptor.java
hive/trunk/metastore/src/model/package.jdo
Added: hive/trunk/metastore/scripts/upgrade/derby/008-HIVE-2246.derby.sql
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/scripts/upgrade/derby/008-HIVE-2246.derby.sql?rev=1155573&view=auto
==============================================================================
--- hive/trunk/metastore/scripts/upgrade/derby/008-HIVE-2246.derby.sql (added)
+++ hive/trunk/metastore/scripts/upgrade/derby/008-HIVE-2246.derby.sql Tue Aug 9 22:04:20 2011
@@ -0,0 +1,93 @@
+/*
+ * Creates the following tables:
+ * - CDS
+ * - COLUMNS_V2
+ * The new columns table is called COLUMNS_V2
+ * because many columns are removed, and the schema is changed.
+ * It'd take too long to migrate and keep the same table.
+ */
+CREATE TABLE "CDS" (
+ "CD_ID" bigint NOT NULL,
+ PRIMARY KEY ("CD_ID")
+);
+
+CREATE TABLE "COLUMNS_V2" (
+ "CD_ID" bigint NOT NULL,
+ "COMMENT" varchar(4000),
+ "COLUMN_NAME" varchar(128) NOT NULL,
+ "TYPE_NAME" varchar(4000),
+ "INTEGER_IDX" INTEGER NOT NULL,
+ PRIMARY KEY ("CD_ID", "COLUMN_NAME")
+);
+
+ALTER TABLE "COLUMNS_V2"
+ ADD CONSTRAINT "COLUMNS_V2_FK1"
+ FOREIGN KEY ("CD_ID") REFERENCES "CDS" ("CD_ID")
+ ON DELETE NO ACTION ON UPDATE NO ACTION
+;
+
+/* Alter the SDS table to:
+ * - add the column CD_ID
+ * - add a foreign key on CD_ID
+ * - create an index on CD_ID
+ */
+ALTER TABLE SDS
+ ADD COLUMN "CD_ID" bigint
+;
+ALTER TABLE SDS
+ ADD CONSTRAINT "SDS_FK2"
+ FOREIGN KEY ("CD_ID") REFERENCES "CDS" ("CD_ID")
+;
+
+/*
+ * Migrate the TBLS table
+ * Add entries into CDS.
+ * Populate the CD_ID field in SDS for tables
+ * Add entires to COLUMNS_V2 based on this table's sd's columns
+ */
+
+/* In the migration, there is a 1:1 mapping between CD_ID and SD_ID
+ * for tables. For speed, just let CD_ID = SD_ID for tables
+ */
+INSERT INTO CDS (CD_ID)
+SELECT t.SD_ID FROM TBLS t WHERE t.SD_ID IS NOT NULL ORDER BY t.SD_ID;
+
+UPDATE SDS
+ SET CD_ID = SD_ID
+WHERE SD_ID in
+(SELECT t.SD_ID FROM TBLS t WHERE t.SD_ID IS NOT NULL ORDER BY t.SD_ID);
+
+INSERT INTO COLUMNS_V2
+ (CD_ID, COMMENT, COLUMN_NAME, TYPE_NAME, INTEGER_IDX)
+SELECT
+ c.SD_ID, c.COMMENT, c.COLUMN_NAME, c.TYPE_NAME, c.INTEGER_IDX
+FROM
+ COLUMNS c
+JOIN
+ TBLS t
+ON
+ t.SD_ID = c.SD_ID
+;
+
+/*
+ * Migrate the partitions.
+ * Update the partitions' SDS to use the parent tables' CD_ID BEGIN
+ * Derby does not allow joins in update statements,
+ * so we have to make a temporary tableh
+ */
+DECLARE GLOBAL TEMPORARY TABLE "TMP_TBL" (
+ "SD_ID" bigint not null,
+ "CD_ID" bigint not null
+) ON COMMIT PRESERVE ROWS NOT LOGGED;
+
+INSERT INTO "SESSION"."TMP_TBL" SELECT
+ p.SD_ID, sds.CD_ID
+ FROM PARTITIONS p
+ JOIN TBLS t ON t.TBL_ID = p.TBL_ID
+ JOIN SDS sds on t.SD_ID = sds.SD_ID
+ WHERE p.SD_ID IS NOT NULL;
+
+UPDATE SDS sd
+ SET sd.CD_ID =
+ (SELECT tt.CD_ID FROM SESSION.TMP_TBL tt WHERE tt.SD_ID = sd.SD_ID)
+ WHERE sd.SD_ID IN (SELECT SD_ID FROM SESSION.TMP_TBL);
Added: hive/trunk/metastore/scripts/upgrade/derby/008-REVERT-HIVE-2246.derby.sql
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/scripts/upgrade/derby/008-REVERT-HIVE-2246.derby.sql?rev=1155573&view=auto
==============================================================================
--- hive/trunk/metastore/scripts/upgrade/derby/008-REVERT-HIVE-2246.derby.sql (added)
+++ hive/trunk/metastore/scripts/upgrade/derby/008-REVERT-HIVE-2246.derby.sql Tue Aug 9 22:04:20 2011
@@ -0,0 +1,13 @@
+/*
+ * Remove the CD_ID column from SDS
+ * Delete the CDS table
+ * Delete the COLUMNS_V2 table
+ */
+
+ALTER TABLE SDS DROP CONSTRAINT SDS_FK2;
+
+ALTER TABLE SDS DROP COLUMN CD_ID;
+
+DROP TABLE COLUMNS_V2;
+
+DROP TABLE CDS;
Added: hive/trunk/metastore/scripts/upgrade/mysql/008-HIVE-2246.mysql.sql
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/scripts/upgrade/mysql/008-HIVE-2246.mysql.sql?rev=1155573&view=auto
==============================================================================
--- hive/trunk/metastore/scripts/upgrade/mysql/008-HIVE-2246.mysql.sql (added)
+++ hive/trunk/metastore/scripts/upgrade/mysql/008-HIVE-2246.mysql.sql Tue Aug 9 22:04:20 2011
@@ -0,0 +1,206 @@
+DELIMITER $$
+DROP PROCEDURE IF EXISTS REVERT $$
+DROP PROCEDURE IF EXISTS ALTER_SDS $$
+DROP PROCEDURE IF EXISTS CREATE_SDS $$
+DROP PROCEDURE IF EXISTS CREATE_TABLES $$
+DROP PROCEDURE IF EXISTS MIGRATE_TABLES $$
+DROP PROCEDURE IF EXISTS MIGRATE_PARTITIONS $$
+DROP PROCEDURE IF EXISTS MIGRATE $$
+DROP PROCEDURE IF EXISTS PRE_MIGRATE $$
+DROP PROCEDURE IF EXISTS RENAME_TMP_COLUMNS $$
+DROP PROCEDURE IF EXISTS CREATE_TABLE_SDS $$
+
+/* Call this procedure to revert all changes by this script */
+CREATE PROCEDURE REVERT()
+ BEGIN
+ ALTER TABLE SDS
+ DROP FOREIGN KEY `SDS_FK2`
+ ;
+ ALTER TABLE SDS
+ DROP COLUMN CD_ID
+ ;
+ DROP TABLE IF EXISTS COLUMNS_V2;
+ DROP TABLE IF EXISTS TABLE_SDS;
+ DROP TABLE IF EXISTS CDS;
+
+ END $$
+
+/* Alter the SDS table to:
+ * - add the column CD_ID
+ * - add a foreign key on CD_ID
+ * - create an index on CD_ID
+ */
+CREATE PROCEDURE ALTER_SDS()
+ BEGIN
+ ALTER TABLE SDS
+ ADD COLUMN CD_ID bigint(20) NULL
+ AFTER SD_ID
+ ;
+ SELECT 'Added the column CD_ID to SD_ID';
+ ALTER TABLE SDS
+ ADD CONSTRAINT `SDS_FK2`
+ FOREIGN KEY (`CD_ID`) REFERENCES `CDS` (`CD_ID`)
+ ;
+ SELECT 'Created a FK Constraint on CD_ID in SDS';
+ CREATE INDEX `SDS_N50` ON SDS
+ (CD_ID)
+ ;
+ SELECT 'Added an index on CD_ID in SDS';
+ END $$
+
+/*
+ * Creates the following tables:
+ * - CDS
+ * - COLUMNS_V2
+ * The new columns table is called COLUMNS_V2
+ * because many columns are removed, and the schema is changed.
+ * It'd take too long to migrate and keep the same table.
+ */
+CREATE PROCEDURE CREATE_TABLES()
+ BEGIN
+ CREATE TABLE IF NOT EXISTS `CDS` (
+ `CD_ID` bigint(20) NOT NULL,
+ PRIMARY KEY (`CD_ID`)
+ ) ENGINE=InnoDB DEFAULT CHARSET=latin1
+ ;
+
+ CREATE TABLE IF NOT EXISTS `COLUMNS_V2` (
+ `CD_ID` bigint(20) NOT NULL,
+ `COMMENT` varchar(256) CHARACTER SET latin1 COLLATE latin1_bin DEFAULT NULL,
+ `COLUMN_NAME` varchar(128) CHARACTER SET latin1 COLLATE latin1_bin NOT NULL,
+ `TYPE_NAME` varchar(4000) DEFAULT NULL,
+ `INTEGER_IDX` int(11) NOT NULL,
+ PRIMARY KEY (`CD_ID`,`COLUMN_NAME`),
+ KEY `COLUMNS_V2_N49` (`CD_ID`),
+ CONSTRAINT `COLUMNS_V2_FK1` FOREIGN KEY (`CD_ID`) REFERENCES `CDS` (`CD_ID`)
+ ) ENGINE=InnoDB DEFAULT CHARSET=latin1
+ ;
+ END $$
+
+/*
+ * Procedures called before migration happens
+ */
+CREATE PROCEDURE PRE_MIGRATE()
+ BEGIN
+ call CREATE_TABLES();
+ SELECT 'Created tables';
+ call CREATE_TABLE_SDS();
+ SELECT 'Created the temp table TABLE_SDS';
+ call ALTER_SDS();
+ SELECT 'Altered the SDS table';
+ END $$
+
+/*
+ * Migrate the TBLS table
+ * Add entries into CDS.
+ * Populate the CD_ID field in SDS for tables
+ * Add entires to COLUMNS_V2 based on this table's sd's columns
+ */
+CREATE PROCEDURE MIGRATE_TABLES()
+ BEGIN
+ /* In the migration, there is a 1:1 mapping between CD_ID and SD_ID
+ * for tables. For speed, just let CD_ID = SD_ID for tables
+ */
+ INSERT INTO CDS (CD_ID)
+ SELECT SD_ID FROM TABLE_SDS;
+ SELECT 'Inserted into CDS';
+
+ UPDATE SDS
+ SET CD_ID = SD_ID
+ WHERE SD_ID in
+ (select SD_ID from TABLE_SDS);
+ SELECT 'Updated CD_ID in SDS';
+
+ INSERT INTO COLUMNS_V2
+ (CD_ID, COMMENT, COLUMN_NAME, TYPE_NAME, INTEGER_IDX)
+ SELECT
+ c.SD_ID, c.COMMENT, c.COLUMN_NAME, c.TYPE_NAME, c.INTEGER_IDX
+ FROM
+ COLUMNS c
+ JOIN
+ TBLS t
+ ON
+ t.SD_ID = c.SD_ID
+ ;
+ SELECT 'Inserted table columns into COLUMNS_V2';
+ END $$
+
+/*
+ * Migrate the partitions.
+ * Update the partition's SDS to use the parent table's CD_ID
+ */
+CREATE PROCEDURE MIGRATE_PARTITIONS()
+ BEGIN
+ UPDATE SDS sd
+ JOIN PARTITIONS p on p.SD_ID = sd.SD_ID
+ JOIN TBLS t on t.TBL_ID = p.TBL_ID
+ SET sd.CD_ID = t.SD_ID
+ where p.SD_ID is not null
+ ;
+ SELECT 'Updated CD_IDs in SDS for partitions'
+
+ END $$
+
+/*
+ * Create a temp table that holds the SDS of tables
+ */
+CREATE PROCEDURE CREATE_TABLE_SDS()
+ BEGIN
+ CREATE TEMPORARY TABLE `TABLE_SDS` (
+ `SD_ID` bigint(20) NOT NULL,
+ PRIMARY KEY (`SD_ID`)
+ ) ENGINE=InnoDB DEFAULT CHARSET=latin1
+ ;
+ INSERT INTO TABLE_SDS
+ (SD_ID)
+ SELECT
+ t.SD_ID
+ FROM
+ TBLS t
+ WHERE
+ t.SD_ID IS NOT NULL
+ ORDER BY
+ t.SD_ID
+ ;
+ END $$
+
+/*
+ * A currently unused function to igrate the COLUMNS_V2 table
+ * to have the name COLUMNS
+ */
+CREATE PROCEDURE RENAME_TMP_COLUMNS()
+ BEGIN
+ /*DROP TABLE `COLUMNS`;*/
+ RENAME TABLE `COLUMNS_V2` TO `COLUMNS`;
+ SELECT 'Renamed COLUMNS_V2 to COLUMNS';
+ ALTER TABLE `COLUMNS`
+ DROP FOREIGN KEY `COLUMNS_V2_FK1`;
+ SELECT 'Dropped FK on Columns';
+ DROP INDEX `COLUMNS_V2_N49` ON COLUMNS;
+ SELECT 'Dropped Index on Columns';
+ CREATE INDEX `COLUMNS_N49` ON COLUMNS
+ (CD_ID)
+ ;
+ SELECT 'Added index on Columns';
+ ALTER TABLE COLUMNS_
+ ADD CONSTRAINT `COLUMNS_FK1`
+ FOREIGN KEY (`CD_ID`) REFERENCES `CDS` (`CD_ID`)
+ ;
+ SELECT 'Added FK on Columns';
+ END $$
+
+/*
+ * Main call for migration
+ */
+CREATE PROCEDURE MIGRATE()
+ BEGIN
+ call PRE_MIGRATE();
+ SELECT 'Completed pre migration';
+ call MIGRATE_TABLES();
+ SELECT 'Completed migrating tables';
+ call MIGRATE_PARTITIONS();
+ SELECT 'Completed migrating partitions';
+ /* Migrate indexes? */
+ END $$
+
+DELIMITER ;
Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=1155573&r1=1155572&r2=1155573&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java Tue Aug 9 22:04:20 2011
@@ -3465,7 +3465,6 @@ public class HiveMetaStore extends Thrif
*/
public static void main(String[] args) throws Throwable {
HiveMetastoreCli cli = new HiveMetastoreCli();
-
cli.parse(args);
// NOTE: It is critical to do this prior to initializing log4j, otherwise
Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1155573&r1=1155572&r2=1155573&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java Tue Aug 9 22:04:20 2011
@@ -79,6 +79,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.UnknownDBException;
import org.apache.hadoop.hive.metastore.api.UnknownPartitionException;
import org.apache.hadoop.hive.metastore.api.UnknownTableException;
+import org.apache.hadoop.hive.metastore.model.MColumnDescriptor;
import org.apache.hadoop.hive.metastore.model.MDBPrivilege;
import org.apache.hadoop.hive.metastore.model.MDatabase;
import org.apache.hadoop.hive.metastore.model.MFieldSchema;
@@ -701,7 +702,17 @@ public class ObjectStore implements RawS
if (partColGrants != null && partColGrants.size() > 0) {
pm.deletePersistentAll(partColGrants);
}
- pm.deletePersistentAll(listMPartitions(dbName, tableName, -1));
+
+ // call dropPartition on each of the table's partitions to follow the
+ // procedure for cleanly dropping partitions.
+ List<MPartition> partsToDelete = listMPartitions(dbName, tableName, -1);
+ if (partsToDelete != null) {
+ for (MPartition mpart : partsToDelete) {
+ dropPartitionCommon(mpart);
+ }
+ }
+
+ preDropStorageDescriptor(tbl.getSd());
// then remove the table
pm.deletePersistentAll(tbl);
}
@@ -885,6 +896,7 @@ public class ObjectStore implements RawS
}
}
+ // A new table is always created with a new column descriptor
return new MTable(tbl.getTableName().toLowerCase(), mdb,
convertToMStorageDescriptor(tbl.getSd()), tbl.getOwner(), tbl
.getCreateTime(), tbl.getLastAccessTime(), tbl.getRetention(),
@@ -955,6 +967,18 @@ public class ObjectStore implements RawS
.getParameters());
}
+ /**
+ * Given a list of model field schemas, create a new model column descriptor.
+ * @param cols the columns the column descriptor contains
+ * @return a new column descriptor db-backed object
+ */
+ private MColumnDescriptor createNewMColumnDescriptor(List<MFieldSchema> cols) {
+ if (cols == null) {
+ return null;
+ }
+ return new MColumnDescriptor(cols);
+ }
+
// MSD and SD should be same objects. Not sure how to make then same right now
// MSerdeInfo *& SerdeInfo should be same as well
private StorageDescriptor convertToStorageDescriptor(MStorageDescriptor msd,
@@ -963,7 +987,8 @@ public class ObjectStore implements RawS
if (msd == null) {
return null;
}
- return new StorageDescriptor(noFS ? null: convertToFieldSchemas(msd.getCols()),
+ List<MFieldSchema> mFieldSchemas = msd.getCD() == null ? null : msd.getCD().getCols();
+ return new StorageDescriptor(noFS ? null: convertToFieldSchemas(mFieldSchemas),
msd.getLocation(), msd.getInputFormat(), msd.getOutputFormat(), msd
.isCompressed(), msd.getNumBuckets(), converToSerDeInfo(msd
.getSerDeInfo()), msd.getBucketCols(), convertToOrders(msd
@@ -975,12 +1000,37 @@ public class ObjectStore implements RawS
return convertToStorageDescriptor(msd, false);
}
+ /**
+ * Converts a storage descriptor to a db-backed storage descriptor. Creates a
+ * new db-backed column descriptor object for this SD.
+ * @param sd the storage descriptor to wrap in a db-backed object
+ * @return the storage descriptor db-backed object
+ * @throws MetaException
+ */
private MStorageDescriptor convertToMStorageDescriptor(StorageDescriptor sd)
throws MetaException {
if (sd == null) {
return null;
}
- return new MStorageDescriptor(convertToMFieldSchemas(sd.getCols()), sd
+ MColumnDescriptor mcd = createNewMColumnDescriptor(convertToMFieldSchemas(sd.getCols()));
+ return convertToMStorageDescriptor(sd, mcd);
+ }
+
+ /**
+ * Converts a storage descriptor to a db-backed storage descriptor. It points the
+ * storage descriptor's column descriptor to the one passed as an argument,
+ * so it does not create a new mcolumn descriptor object.
+ * @param sd the storage descriptor to wrap in a db-backed object
+ * @param mcd the db-backed column descriptor
+ * @return the db-backed storage descriptor object
+ * @throws MetaException
+ */
+ private MStorageDescriptor convertToMStorageDescriptor(StorageDescriptor sd,
+ MColumnDescriptor mcd) throws MetaException {
+ if (sd == null) {
+ return null;
+ }
+ return new MStorageDescriptor(mcd, sd
.getLocation(), sd.getInputFormat(), sd.getOutputFormat(), sd
.isCompressed(), sd.getNumBuckets(), converToMSerDeInfo(sd
.getSerdeInfo()), sd.getBucketCols(),
@@ -1002,7 +1052,7 @@ public class ObjectStore implements RawS
part.getDbName(), part.getTableName());
}
openTransaction();
- MPartition mpart = convertToMPart(part);
+ MPartition mpart = convertToMPart(part, true);
pm.makePersistent(mpart);
int now = (int)(System.currentTimeMillis()/1000);
@@ -1086,7 +1136,18 @@ public class ObjectStore implements RawS
return mpart;
}
- private MPartition convertToMPart(Partition part)
+ /**
+ * Convert a Partition object into an MPartition, which is an object backed by the db
+ * If the Partition's set of columns is the same as the parent table's AND useTableCD
+ * is true, then this partition's storage descriptor's column descriptor will point
+ * to the same one as the table's storage descriptor.
+ * @param part the partition to convert
+ * @param useTableCD whether to try to use the parent table's column descriptor.
+ * @return the model partition object
+ * @throws InvalidObjectException
+ * @throws MetaException
+ */
+ private MPartition convertToMPart(Partition part, boolean useTableCD)
throws InvalidObjectException, MetaException {
if (part == null) {
return null;
@@ -1096,10 +1157,26 @@ public class ObjectStore implements RawS
throw new InvalidObjectException(
"Partition doesn't have a valid table or database name");
}
+
+ // If this partition's set of columns is the same as the parent table's,
+ // use the parent table's, so we do not create a duplicate column descriptor,
+ // thereby saving space
+ MStorageDescriptor msd;
+ if (useTableCD &&
+ mt.getSd() != null && mt.getSd().getCD() != null &&
+ mt.getSd().getCD().getCols() != null &&
+ part.getSd() != null &&
+ convertToFieldSchemas(mt.getSd().getCD().getCols()).
+ equals(part.getSd().getCols())) {
+ msd = convertToMStorageDescriptor(part.getSd(), mt.getSd().getCD());
+ } else {
+ msd = convertToMStorageDescriptor(part.getSd());
+ }
+
return new MPartition(Warehouse.makePartName(convertToFieldSchemas(mt
.getPartitionKeys()), part.getValues()), mt, part.getValues(), part
.getCreateTime(), part.getLastAccessTime(),
- convertToMStorageDescriptor(part.getSd()), part.getParameters());
+ msd, part.getParameters());
}
private Partition convertToPart(MPartition mpart) throws MetaException {
@@ -1122,33 +1199,58 @@ public class ObjectStore implements RawS
mpart.getParameters());
}
+ @Override
public boolean dropPartition(String dbName, String tableName,
List<String> part_vals) throws MetaException {
boolean success = false;
try {
openTransaction();
MPartition part = getMPartition(dbName, tableName, part_vals);
+ dropPartitionCommon(part);
+ success = commitTransaction();
+ } finally {
+ if (!success) {
+ rollbackTransaction();
+ }
+ }
+ return success;
+ }
+
+ /**
+ * Drop an MPartition and cascade deletes (e.g., delete partition privilege grants,
+ * drop the storage descriptor cleanly, etc.)
+ * @param part - the MPartition to drop
+ * @return whether the transaction committed successfully
+ */
+ private boolean dropPartitionCommon(MPartition part) {
+ boolean success = false;
+ try {
+ openTransaction();
if (part != null) {
List<MFieldSchema> schemas = part.getTable().getPartitionKeys();
List<String> colNames = new ArrayList<String>();
for (MFieldSchema col: schemas) {
colNames.add(col.getName());
}
- String partName = FileUtils.makePartName(colNames, part_vals);
+ String partName = FileUtils.makePartName(colNames, part.getValues());
List<MPartitionPrivilege> partGrants = listPartitionGrants(
- dbName, tableName, partName);
+ part.getTable().getDatabase().getName(),
+ part.getTable().getTableName(),
+ partName);
if (partGrants != null && partGrants.size() > 0) {
pm.deletePersistentAll(partGrants);
}
List<MPartitionColumnPrivilege> partColumnGrants = listPartitionAllColumnGrants(
- dbName, tableName, partName);
+ part.getTable().getDatabase().getName(),
+ part.getTable().getTableName(),
+ partName);
if (partColumnGrants != null && partColumnGrants.size() > 0) {
pm.deletePersistentAll(partColumnGrants);
}
-
+ preDropStorageDescriptor(part.getSd());
pm.deletePersistent(part);
}
success = commitTransaction();
@@ -1740,7 +1842,9 @@ public class ObjectStore implements RawS
oldt.setTableName(newt.getTableName().toLowerCase());
oldt.setParameters(newt.getParameters());
oldt.setOwner(newt.getOwner());
- oldt.setSd(newt.getSd());
+ // Fully copy over the contents of the new SD into the old SD,
+ // so we don't create an extra SD in the metastore db that has no references.
+ fullCopyMSD(newt.getSd(), oldt.getSd());
oldt.setDatabase(newt.getDatabase());
oldt.setRetention(newt.getRetention());
oldt.setPartitionKeys(newt.getPartitionKeys());
@@ -1796,7 +1900,7 @@ public class ObjectStore implements RawS
name = name.toLowerCase();
dbname = dbname.toLowerCase();
MPartition oldp = getMPartition(dbname, name, newPart.getValues());
- MPartition newp = convertToMPart(newPart);
+ MPartition newp = convertToMPart(newPart, false);
if (oldp == null || newp == null) {
throw new InvalidObjectException("partition does not exist.");
}
@@ -1821,7 +1925,25 @@ public class ObjectStore implements RawS
private void copyMSD(MStorageDescriptor newSd, MStorageDescriptor oldSd) {
oldSd.setLocation(newSd.getLocation());
- oldSd.setCols(newSd.getCols());
+ MColumnDescriptor oldCD = oldSd.getCD();
+ // If the columns of the old column descriptor != the columns of the new one,
+ // then change the old storage descriptor's column descriptor.
+ // Convert the MFieldSchema's to their thrift object counterparts, because we maintain
+ // datastore identity (i.e., identity of the model objects are managed by JDO,
+ // not the application).
+ if (!(oldSd != null && oldSd.getCD() != null &&
+ oldSd.getCD().getCols() != null &&
+ newSd != null && newSd.getCD() != null &&
+ newSd.getCD().getCols() != null &&
+ convertToFieldSchemas(newSd.getCD().getCols()).
+ equals(convertToFieldSchemas(oldSd.getCD().getCols()))
+ )) {
+ oldSd.setCD(newSd.getCD());
+ }
+
+ //If oldCd does not have any more references, then we should delete it
+ // from the backend db
+ removeUnusedColumnDescriptor(oldCD);
oldSd.setBucketCols(newSd.getBucketCols());
oldSd.setCompressed(newSd.isCompressed());
oldSd.setInputFormat(newSd.getInputFormat());
@@ -1833,6 +1955,92 @@ public class ObjectStore implements RawS
oldSd.getSerDeInfo().setParameters(newSd.getSerDeInfo().getParameters());
}
+ /**
+ * copy over all fields from newSd to oldSd
+ * @param newSd the new storage descriptor
+ * @param oldSd the old descriptor that gets copied over
+ */
+ private void fullCopyMSD(MStorageDescriptor newSd, MStorageDescriptor oldSd) {
+ copyMSD(newSd, oldSd);
+ oldSd.setSortCols(newSd.getSortCols());
+ oldSd.setParameters(newSd.getParameters());
+ }
+
+ /**
+ * Checks if a column descriptor has any remaining references by storage descriptors
+ * in the db. If it does not, then delete the CD. If it does, then do nothing.
+ * @param oldCD the column descriptor to delete if it is no longer referenced anywhere
+ */
+ private void removeUnusedColumnDescriptor(MColumnDescriptor oldCD) {
+ if (oldCD == null) {
+ return;
+ }
+
+ boolean success = false;
+ try {
+ openTransaction();
+ LOG.debug("execute removeUnusedColumnDescriptor");
+ List<MStorageDescriptor> referencedSDs = listStorageDescriptorsWithCD(oldCD);
+ //if no other SD references this CD, we can throw it out.
+ if (referencedSDs != null && referencedSDs.isEmpty()) {
+ pm.retrieve(oldCD);
+ pm.deletePersistent(oldCD);
+ }
+ success = commitTransaction();
+ LOG.debug("successfully deleted a CD in removeUnusedColumnDescriptor");
+ } finally {
+ if (!success) {
+ rollbackTransaction();
+ }
+ }
+ }
+
+ /**
+ * Called right before an action that would drop a storage descriptor.
+ * This function makes the SD's reference to a CD null, and then deletes the CD
+ * if it no longer is referenced in the table.
+ * @param msd the storage descriptor to drop
+ */
+ private void preDropStorageDescriptor(MStorageDescriptor msd) {
+ if (msd == null || msd.getCD() == null) {
+ return;
+ }
+
+ MColumnDescriptor mcd = msd.getCD();
+ // Because there is a 1-N relationship between CDs and SDs,
+ // we must set the SD's CD to null first before dropping the storage descriptor
+ // to satisfy foriegn key constraints.
+ msd.setCD(null);
+ removeUnusedColumnDescriptor(mcd);
+ }
+
+ /**
+ * Get a list of storage descriptors that reference a particular Column Descriptor
+ * @param oldCD the column descriptor to get storage descriptors for
+ * @return a list of storage descriptors
+ */
+ private List<MStorageDescriptor> listStorageDescriptorsWithCD(MColumnDescriptor oldCD) {
+ boolean success = false;
+ List<MStorageDescriptor> sds = null;
+ try {
+ openTransaction();
+ LOG.debug("Executing listStorageDescriptorsWithCD");
+ Query query = pm.newQuery(MStorageDescriptor.class,
+ "this.cd == inCD");
+ query.declareParameters("MColumnDescriptor inCD");
+ sds = (List<MStorageDescriptor>) query.execute(oldCD);
+ LOG.debug("Done executing query for listStorageDescriptorsWithCD");
+ pm.retrieveAll(sds);
+ success = commitTransaction();
+ LOG.debug("Done retrieving all objects for listStorageDescriptorsWithCD");
+ } finally {
+ if (!success) {
+ rollbackTransaction();
+ }
+ }
+ return sds;
+ }
+
@Override
public boolean addIndex(Index index) throws InvalidObjectException,
MetaException {
Added: hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MColumnDescriptor.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MColumnDescriptor.java?rev=1155573&view=auto
==============================================================================
--- hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MColumnDescriptor.java (added)
+++ hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MColumnDescriptor.java Tue Aug 9 22:04:20 2011
@@ -0,0 +1,51 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ *
+ */
+package org.apache.hadoop.hive.metastore.model;
+
+import java.util.List;
+
+/**
+ *
+ * MColumnDescriptor.
+ * A wrapper around a list of columns.
+ */
+public class MColumnDescriptor {
+ private List<MFieldSchema> cols;
+
+ public MColumnDescriptor() {}
+
+ /**
+ *
+ * @param cols
+ */
+ public MColumnDescriptor(List<MFieldSchema> cols) {
+ this.cols = cols;
+ }
+
+ public List<MFieldSchema> getCols() {
+ return cols;
+ }
+
+ public void setCols(List<MFieldSchema> cols) {
+ this.cols = cols;
+ }
+}
Modified: hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MStorageDescriptor.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MStorageDescriptor.java?rev=1155573&r1=1155572&r2=1155573&view=diff
==============================================================================
--- hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MStorageDescriptor.java (original)
+++ hive/trunk/metastore/src/model/org/apache/hadoop/hive/metastore/model/MStorageDescriptor.java Tue Aug 9 22:04:20 2011
@@ -22,7 +22,7 @@ import java.util.List;
import java.util.Map;
public class MStorageDescriptor {
- private List<MFieldSchema> cols;
+ private MColumnDescriptor cd;
private String location;
private String inputFormat;
private String outputFormat;
@@ -32,12 +32,12 @@ public class MStorageDescriptor {
private List<String> bucketCols;
private List<MOrder> sortCols;
private Map<String, String> parameters;
-
+
public MStorageDescriptor() {}
-
+
/**
- * @param cols
+ * @param cd
* @param location
* @param inputFormat
* @param outputFormat
@@ -48,10 +48,10 @@ public class MStorageDescriptor {
* @param sortOrder
* @param parameters
*/
- public MStorageDescriptor(List<MFieldSchema> cols, String location, String inputFormat,
+ public MStorageDescriptor(MColumnDescriptor cd, String location, String inputFormat,
String outputFormat, boolean isCompressed, int numBuckets, MSerDeInfo serDeInfo,
List<String> bucketCols, List<MOrder> sortOrder, Map<String, String> parameters) {
- this.cols = cols;
+ this.cd = cd;
this.location = location;
this.inputFormat = inputFormat;
this.outputFormat = outputFormat;
@@ -163,17 +163,17 @@ public class MStorageDescriptor {
}
/**
- * @return the cols
+ * @return the column descriptor
*/
- public List<MFieldSchema> getCols() {
- return cols;
+ public MColumnDescriptor getCD() {
+ return cd;
}
/**
- * @param cols the cols to set
+ * @param cd the Column Descriptor to set
*/
- public void setCols(List<MFieldSchema> cols) {
- this.cols = cols;
+ public void setCD(MColumnDescriptor cd) {
+ this.cd = cd;
}
/**
Modified: hive/trunk/metastore/src/model/package.jdo
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/model/package.jdo?rev=1155573&r1=1155572&r2=1155573&view=diff
==============================================================================
--- hive/trunk/metastore/src/model/package.jdo (original)
+++ hive/trunk/metastore/src/model/package.jdo Tue Aug 9 22:04:20 2011
@@ -191,17 +191,17 @@
</field>
</class>
- <class name="MStorageDescriptor" identity-type="datastore" table="SDS" detachable="true">
+ <class name="MColumnDescriptor" identity-type="datastore" table="CDS" detachable="true">
<datastore-identity>
- <column name="SD_ID"/>
+ <column name="CD_ID"/>
</datastore-identity>
- <field name="cols" table="COLUMNS" >
+ <field name="cols" table="COLUMNS_V2" >
<collection element-type="MFieldSchema"/>
<join>
<primary-key name="COLUMNS_PK">
<column name="COLUMN_NAME"/>
</primary-key>
- <column name="SD_ID"/>
+ <column name="CD_ID"/>
</join>
<element>
<embedded>
@@ -214,6 +214,15 @@
</embedded>
</element>
</field>
+ </class>
+
+ <class name="MStorageDescriptor" identity-type="datastore" table="SDS" detachable="true">
+ <datastore-identity>
+ <column name="SD_ID"/>
+ </datastore-identity>
+ <field name="cd">
+ <column name="CD_ID"/>
+ </field>
<field name="location">
<column name="LOCATION" length="4000" jdbc-type="VARCHAR"/>
</field>