You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2012/02/10 06:46:54 UTC

svn commit: r1242682 - in /hive/trunk: metastore/scripts/upgrade/ metastore/src/java/org/apache/hadoop/hive/metastore/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/

Author: namit
Date: Fri Feb 10 05:46:54 2012
New Revision: 1242682

URL: http://svn.apache.org/viewvc?rev=1242682&view=rev
Log:
HIVE-2795 View partitions do not have a storage descriptor
(Kevin Wilfong via namit)


Added:
    hive/trunk/metastore/scripts/upgrade/001-HIVE-2795.update_view_partitions.py
    hive/trunk/ql/src/test/queries/clientpositive/describe_formatted_view_partitioned.q
    hive/trunk/ql/src/test/results/clientpositive/describe_formatted_view_partitioned.q.out
Modified:
    hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java

Added: hive/trunk/metastore/scripts/upgrade/001-HIVE-2795.update_view_partitions.py
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/scripts/upgrade/001-HIVE-2795.update_view_partitions.py?rev=1242682&view=auto
==============================================================================
--- hive/trunk/metastore/scripts/upgrade/001-HIVE-2795.update_view_partitions.py (added)
+++ hive/trunk/metastore/scripts/upgrade/001-HIVE-2795.update_view_partitions.py Fri Feb 10 05:46:54 2012
@@ -0,0 +1,140 @@
+#!/usr/local/bin/python
+
+# This script, provided with a list of view partitions, drop each partition and
+# add them back via the metastore Thrift server.  This is needed because prior
+# to HIVE-2795 view partitions were created without storage descriptors, which
+# breaks commands such as DESCRIBE FORMATTED when called on these partitions.
+# Altering a view's partition is not currently supported via the Hive CLI, and
+# it results in an exception when attempted through the metastore Thrift server
+# (due to the storage descriptor being null) so no data will be lost by dropping
+# and adding the partition.
+#
+# WARNING: dropping and adding the partition is non-atomic.  The script outputs
+#          each line of the file as it processes it.  You should pipe this
+#          ouptut to a log file so that, if the machine fails between dropping
+#          and adding, you know which partition may not have been added.  If it
+#          has not, go to the Hive CLI and run the command
+#
+#          ALTER VIEW <view_name> ADD PARTITION (<part_spec>);
+#
+#          where view_name is the name of the view, which can be taken directly
+#          from the line in the log, and part_spec is the partition
+#          specification, which can be determined from the line in the log
+#          E.g. if the partition name is col1=a/col2=b/col3=c part_spec should
+#               be col1='a', col2='b', col3='c'
+#
+# NOTE: If any partition contains characters which are escaped, this script will
+#       not work, this includes ASCII values 1-31,127 and the characters
+#       " # % ' * / : = ? \ { [ ]
+
+# Before running this script first execute the following query against your
+# metastore:
+#
+# SELECT name, tbl_name, part_name
+# FROM
+#   DBS d JOIN TBLS t ON d.db_id = t.db_id
+#   JOIN PARTITIONS p ON t.tbl_id = p.tbl_id
+#   WHERE t.tbl_type = "VIRTUAL_VIEW";
+#
+# Place the results of this query in a file.  The format of the file should be
+# as follow:
+#
+# db_name<sep>tbl_name<sep>part_name
+#
+# where <sep> represents a column separator (tab by default).
+#
+# Then execute this script passing in the path to the file you created, as well
+# as the metastore host, port, and timeout and the separator used in the file if
+# they differ from the defaults.
+
+# To run this script you need Thrift Python library, as well as Hive's metastore
+# Python library in your PYTHONPATH, Hive's metastore Python library can be
+# found in trunk/build/dist/lib/py/
+
+from optparse import OptionGroup
+from optparse import OptionParser
+
+from thrift import Thrift
+from thrift.transport import TSocket
+from thrift.transport import TTransport
+from thrift.protocol import TBinaryProtocol
+
+from hive_metastore import ThriftHiveMetastore
+
+# Parse args
+parser = OptionParser()
+
+mandatory_options = OptionGroup(parser, "Mandatory Settings",
+                          "These must be set, they have no defaults")
+
+mandatory_options.add_option("--file", action="store", type="string", dest="file",
+                          help="file containing the list of view partitions " +
+                               "stored as db_name<sep>table_name<sep>part_name")
+
+parser.add_option_group(mandatory_options)
+
+other_options = OptionGroup(parser, "Other Options",
+                            "These options all have default values")
+
+other_options.add_option("--host", action="store", type="string", dest="host",
+                          default="localhost",
+                          help="hostname of metastore server, " +
+                               "the default is localhost")
+other_options.add_option("--port", action="store", type="string", dest="port",
+                          default="9083",
+                          help="port for metastore server, the default is 9083")
+other_options.add_option("--timeout", action="store", type="string", dest="timeout",
+                          default=None,
+                          help="timeout for connection to metastore server, " +
+                               "uses Thrift's default")
+other_options.add_option("--separator", action="store", type="string", dest="separator",
+                          default="\t",
+                          help="the separator between db_name, table_name, and " +
+                               "part_name in the file passed in, the default " +
+                               "is tab")
+
+parser.add_option_group(other_options)
+
+(options, args) = parser.parse_args()
+
+host = options.host
+port = options.port
+timeout = options.timeout
+file = options.file
+separator = options.separator
+
+# Prepare the Thrift connection to the metastore
+
+_socket = TSocket.TSocket(host, port)
+_socket.setTimeout(timeout)
+_transport = TTransport.TBufferedTransport(_socket)
+_protocol = TBinaryProtocol.TBinaryProtocol(_transport)
+
+client = ThriftHiveMetastore.Client(_protocol)
+_transport.open()
+
+# Iterate over the file of partitions
+
+partition_file=open(file,'r')
+db_name = ''
+table_name = ''
+part_name = ''
+
+for line in partition_file:
+
+    line = line.rstrip("\n\r")
+    (db_name,table_name,part_name)=line.split(separator)
+
+    print line
+
+    # Get the partition associated with this line
+
+    partition = client.get_partition_by_name(db_name, table_name, part_name)
+
+    # Drop it
+
+    client.drop_partition_by_name(db_name, table_name, part_name, 0)
+
+    # Add it back
+
+    client.add_partition(partition)

Modified: hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: http://svn.apache.org/viewvc/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=1242682&r1=1242681&r2=1242682&view=diff
==============================================================================
--- hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (original)
+++ hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java Fri Feb 10 05:46:54 2012
@@ -1522,10 +1522,12 @@ public class HiveMetaStore extends Thrif
               "Unable to add partition because table or database do not exist");
         }
 
-        String partLocationStr = null;
-        if (part.getSd() != null) {
-          partLocationStr = part.getSd().getLocation();
+        if (tbl.getTableType().equals(TableType.VIRTUAL_VIEW.toString()) && part.getSd() == null) {
+          part.setSd(tbl.getSd().deepCopy());
         }
+
+        String partLocationStr = part.getSd().getLocation();
+
         if (partLocationStr == null || partLocationStr.isEmpty()) {
           // set default location if not specified and this is
           // a physical table partition (not a view)

Added: hive/trunk/ql/src/test/queries/clientpositive/describe_formatted_view_partitioned.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/describe_formatted_view_partitioned.q?rev=1242682&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/describe_formatted_view_partitioned.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/describe_formatted_view_partitioned.q Fri Feb 10 05:46:54 2012
@@ -0,0 +1,15 @@
+DROP VIEW view_partitioned;
+
+CREATE VIEW view_partitioned
+PARTITIONED ON (value)
+AS
+SELECT key, value
+FROM src
+WHERE key=86;
+
+ALTER VIEW view_partitioned
+ADD PARTITION (value='val_86');
+
+DESCRIBE FORMATTED view_partitioned PARTITION (value='val_86');
+
+DROP VIEW view_partitioned;

Added: hive/trunk/ql/src/test/results/clientpositive/describe_formatted_view_partitioned.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/describe_formatted_view_partitioned.q.out?rev=1242682&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/describe_formatted_view_partitioned.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/describe_formatted_view_partitioned.q.out Fri Feb 10 05:46:54 2012
@@ -0,0 +1,71 @@
+PREHOOK: query: DROP VIEW view_partitioned
+PREHOOK: type: DROPVIEW
+POSTHOOK: query: DROP VIEW view_partitioned
+POSTHOOK: type: DROPVIEW
+PREHOOK: query: CREATE VIEW view_partitioned
+PARTITIONED ON (value)
+AS
+SELECT key, value
+FROM src
+WHERE key=86
+PREHOOK: type: CREATEVIEW
+#### A masked pattern was here ####
+POSTHOOK: query: CREATE VIEW view_partitioned
+PARTITIONED ON (value)
+AS
+SELECT key, value
+FROM src
+WHERE key=86
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Output: default@view_partitioned
+#### A masked pattern was here ####
+PREHOOK: query: ALTER VIEW view_partitioned
+ADD PARTITION (value='val_86')
+PREHOOK: type: ALTERTABLE_ADDPARTS
+PREHOOK: Input: default@src
+PREHOOK: Input: default@view_partitioned
+POSTHOOK: query: ALTER VIEW view_partitioned
+ADD PARTITION (value='val_86')
+POSTHOOK: type: ALTERTABLE_ADDPARTS
+POSTHOOK: Input: default@src
+POSTHOOK: Input: default@view_partitioned
+POSTHOOK: Output: default@view_partitioned@value=val_86
+PREHOOK: query: DESCRIBE FORMATTED view_partitioned PARTITION (value='val_86')
+PREHOOK: type: DESCTABLE
+POSTHOOK: query: DESCRIBE FORMATTED view_partitioned PARTITION (value='val_86')
+POSTHOOK: type: DESCTABLE
+# col_name            	data_type           	comment             
+	 	 
+key                 	string              	None                
+	 	 
+# Partition Information	 	 
+# col_name            	data_type           	comment             
+	 	 
+value               	string              	None                
+	 	 
+# Detailed Partition Information	 	 
+Partition Value:    	[val_86]            	 
+Database:           	default             	 
+Table:              	view_partitioned    	 
+#### A masked pattern was here ####
+Protect Mode:       	None                	 
+#### A masked pattern was here ####
+Partition Parameters:	 	 
+#### A masked pattern was here ####
+	 	 
+# Storage Information	 	 
+SerDe Library:      	null                	 
+InputFormat:        	org.apache.hadoop.mapred.SequenceFileInputFormat	 
+OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat	 
+Compressed:         	No                  	 
+Num Buckets:        	-1                  	 
+Bucket Columns:     	[]                  	 
+Sort Columns:       	[]                  	 
+PREHOOK: query: DROP VIEW view_partitioned
+PREHOOK: type: DROPVIEW
+PREHOOK: Input: default@view_partitioned
+PREHOOK: Output: default@view_partitioned
+POSTHOOK: query: DROP VIEW view_partitioned
+POSTHOOK: type: DROPVIEW
+POSTHOOK: Input: default@view_partitioned
+POSTHOOK: Output: default@view_partitioned