You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2008/09/17 22:13:01 UTC
svn commit: r696427 [1/4] - in /hadoop/core/trunk: ./ src/contrib/hive/
src/contrib/hive/conf/ src/contrib/hive/metastore/if/
src/contrib/hive/metastore/src/gen-py/hive_metastore/
src/contrib/hive/metastore/src/java/org/apache/hadoop/hive/metastore/
Author: dhruba
Date: Wed Sep 17 13:13:00 2008
New Revision: 696427
URL: http://svn.apache.org/viewvc?rev=696427&view=rev
Log:
HADOOP-4087. Hive Metastore API for php and python clients.
(Prasad Chakka via dhruba)
Added:
hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetastore-remote
hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetastore.py
hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftMetaStore-remote
hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftMetaStore.py
Removed:
hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetaStore-remote
hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetaStore.py
Modified:
hadoop/core/trunk/CHANGES.txt
hadoop/core/trunk/src/contrib/hive/build.xml
hadoop/core/trunk/src/contrib/hive/conf/jpox.properties
hadoop/core/trunk/src/contrib/hive/metastore/if/hive_metastore.thrift
hadoop/core/trunk/src/contrib/hive/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
hadoop/core/trunk/src/contrib/hive/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=696427&r1=696426&r2=696427&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Wed Sep 17 13:13:00 2008
@@ -586,6 +586,9 @@
HADOOP-4125. Handles Reduce cleanup tip on the web ui.
(Amareshwari Sriramadasu via ddas)
+ HADOOP-4087. Hive Metastore API for php and python clients.
+ (Prasad Chakka via dhruba)
+
Release 0.18.1 - 2008-09-17
IMPROVEMENTS
Modified: hadoop/core/trunk/src/contrib/hive/build.xml
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/build.xml?rev=696427&r1=696426&r2=696427&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/build.xml (original)
+++ hadoop/core/trunk/src/contrib/hive/build.xml Wed Sep 17 13:13:00 2008
@@ -130,6 +130,8 @@
<mkdir dir="${target.example.dir}"/>
<mkdir dir="${target.example.dir}/files"/>
<mkdir dir="${target.example.dir}/queries"/>
+ <mkdir dir="${target.lib.dir}/py"/>
+ <mkdir dir="${target.lib.dir}/php"/>
<exec executable="cp" failonerror="true">
<arg line="-p '${hive.root}/bin/hive' '${target.bin.dir}'"/>
</exec>
@@ -137,6 +139,12 @@
<copy file="${basedir}/conf/hive-default.xml" todir="${target.conf.dir}"/>
<copy file="${basedir}/conf/hive-log4j.properties" todir="${target.conf.dir}"/>
<copy file="${basedir}/conf/jpox.properties" todir="${target.conf.dir}"/>
+ <copy todir="${target.lib.dir}/php">
+ <fileset dir="${hive.root}/metastore/src/gen-php"/>
+ </copy>
+ <copy todir="${target.lib.dir}/py">
+ <fileset dir="${hive.root}/metastore/src/gen-py"/>
+ </copy>
<copy todir="${target.lib.dir}" preservelastmodified="true" flatten="true">
<fileset dir="${hive.root}" includes="*/*.jar, */*/*.jar" excludes="**/antlr-2*,**/antlr-3*"/>
<fileset file="${build.dir.hive}/cli/hive_cli.jar"/>
Modified: hadoop/core/trunk/src/contrib/hive/conf/jpox.properties
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/conf/jpox.properties?rev=696427&r1=696426&r2=696427&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/conf/jpox.properties (original)
+++ hadoop/core/trunk/src/contrib/hive/conf/jpox.properties Wed Sep 17 13:13:00 2008
@@ -13,3 +13,5 @@
javax.jdo.option.ConnectionURL=jdbc:derby:;databaseName=test_metastore_db;create=true
javax.jdo.option.ConnectionUserName=APP
javax.jdo.option.ConnectionPassword=mine
+org.jpox.cache.level2=true
+org.jpox.cache.level2.type=SOFT
Modified: hadoop/core/trunk/src/contrib/hive/metastore/if/hive_metastore.thrift
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/metastore/if/hive_metastore.thrift?rev=696427&r1=696426&r2=696427&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/metastore/if/hive_metastore.thrift (original)
+++ hadoop/core/trunk/src/contrib/hive/metastore/if/hive_metastore.thrift Wed Sep 17 13:13:00 2008
@@ -88,8 +88,8 @@
}
struct FieldSchema {
- string name,
- string type,
+ string name, // name of the field
+ string type, // type of the field. primitive types defined above, specify list<TYPE_NAME>, map<TYPE_NAME, TYPE_NAME> for lists & maps
string comment
}
@@ -124,20 +124,20 @@
}
struct StorageDescriptor {
- list<FieldSchema> cols,
- string location,
- string inputFormat;
- string outputFormat;
- bool isCompressed;
+ list<FieldSchema> cols, // required (refer to types defined above)
+ string location, // defaults to <warehouse loc>/<db loc>/tablename
+ string inputFormat; // SequenceFileInputFormat (binary) or TextInputFormat` or custom format
+ string outputFormat; // SequenceFileOutputFormat (binary) or IgnoreKeyTextOutputFormat or custom format
+ bool isCompressed; // compressed or not
i32 numBuckets = 32, // this must be specified if there are any dimension columns
- SerDeInfo serdeInfo;
- list<string> bucketCols, //reducer grouping columns and clustering columns and bucketing columns`
- list<Order> sortCols,
- map<string, string> parameters
+ SerDeInfo serdeInfo; // serialization and deserialization information
+ list<string> bucketCols, // reducer grouping columns and clustering columns and bucketing columns`
+ list<Order> sortCols, // sort order of the data in each bucket
+ map<string, string> parameters // any user supplied key value hash
}
struct Table {
- string tableName,
+ string tableName,
string database,
string owner,
i32 createTime,
@@ -214,7 +214,14 @@
list<FieldSchema> get_fields(string db_name, string table_name) throws (MetaException ouch1, UnknownTableException ouch2, UnknownDBException ouch3),
// Tables
- // create the table with the given table object in the given database
+ // create a Hive table. Following fields must be set
+ // Table.tableName
+ // Table.database (only 'default' for now until Hive QL supports databases)
+ // Table.owner (not needed, but good to have for tracking purposes)
+ // Table.sd.cols (list of field schemas)
+ // Table.sd.inputFormat ( SequenceFileInputFormat (binary like falcon tables or u_full) or TextInputFormat)
+ // Table.sd.outputFormat ( SequenceFileInputFormat (binary) or TextInputFormat)
+ // Table.sd.serdeInfo.serializationLib (SerDe class name such as org.apache.hadoop.hive.serde.simple_meta.MetadataTypedColumnsetSerDe
void create_table(1:Table tbl) throws(1:AlreadyExistsException ouch1, 2:InvalidObjectException ouch2, 3:MetaException ouch3, 4:NoSuchObjectException o4)
// drops the table and all the partitions associated with it if the table has partitions
// delete data (including partitions) if deleteData is set to true
Added: hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetastore-remote
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetastore-remote?rev=696427&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetastore-remote (added)
+++ hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetastore-remote Wed Sep 17 13:13:00 2008
@@ -0,0 +1,247 @@
+#!/usr/bin/env python
+#
+# Autogenerated by Thrift
+#
+# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+#
+
+import sys
+import pprint
+from urlparse import urlparse
+from thrift.transport import TTransport
+from thrift.transport import TSocket
+from thrift.transport import THttpClient
+from thrift.protocol import TBinaryProtocol
+
+import ThriftHiveMetastore
+from ttypes import *
+
+if len(sys.argv) <= 1 or sys.argv[1] == '--help':
+ print ''
+ print 'Usage: ' + sys.argv[0] + ' [-h host:port] [-u url] [-f[ramed]] function [arg1 [arg2...]]'
+ print ''
+ print 'Functions:'
+ print ' bool create_database(string name, string location_uri)'
+ print ' Database get_database(string name)'
+ print ' bool drop_database(string name)'
+ print ' get_databases()'
+ print ' Type get_type(string name)'
+ print ' bool create_type(Type type)'
+ print ' bool drop_type(string type)'
+ print ' get_type_all(string name)'
+ print ' get_fields(string db_name, string table_name)'
+ print ' void create_table(Table tbl)'
+ print ' void drop_table(string dbname, string name, bool deleteData)'
+ print ' get_tables(string db_name, string pattern)'
+ print ' Table get_table(string dbname, string tbl_name)'
+ print ' bool set_table_parameters(string dbname, string tbl_name, params)'
+ print ' void alter_table(string dbname, string tbl_name, Table new_tbl)'
+ print ' void truncate_table(string db_name, string table_name, string partition)'
+ print ' cat(string db_name, string table_name, string partition, i32 high)'
+ print ' Partition add_partition(Partition new_part)'
+ print ' Partition append_partition(string db_name, string tbl_name, part_vals)'
+ print ' bool drop_partition(string db_name, string tbl_name, part_vals, bool deleteData)'
+ print ' Partition get_partition(string db_name, string tbl_name, part_vals)'
+ print ' get_partitions(string db_name, string tbl_name, i16 max_parts)'
+ print ' bool set_partition_parameters(string db_name, string tbl_name, string pname, params)'
+ print ' bool alter_partitions(StorageDescriptor sd, parts)'
+ print ' bool create_index(Index index_def)'
+ print ''
+ sys.exit(0)
+
+pp = pprint.PrettyPrinter(indent = 2)
+host = 'localhost'
+port = 9090
+uri = ''
+framed = False
+http = False
+argi = 1
+
+if sys.argv[argi] == '-h':
+ parts = sys.argv[argi+1].split(':')
+ host = parts[0]
+ port = int(parts[1])
+ argi += 2
+
+if sys.argv[argi] == '-u':
+ url = urlparse(sys.argv[argi+1])
+ parts = url[1].split(':')
+ host = parts[0]
+ if len(parts) > 1:
+ port = int(parts[1])
+ else:
+ port = 80
+ uri = url[2]
+ http = True
+ argi += 2
+
+if sys.argv[argi] == '-f' or sys.argv[argi] == '-framed':
+ framed = True
+ argi += 1
+
+cmd = sys.argv[argi]
+args = sys.argv[argi+1:]
+
+if http:
+ transport = THttpClient.THttpClient(host, port, uri)
+else:
+ socket = TSocket.TSocket(host, port)
+ if framed:
+ transport = TTransport.TFramedTransport(socket)
+ else:
+ transport = TTransport.TBufferedTransport(socket)
+protocol = TBinaryProtocol.TBinaryProtocol(transport)
+client = ThriftHiveMetastore.Client(protocol)
+transport.open()
+
+if cmd == 'create_database':
+ if len(args) != 2:
+ print 'create_database requires 2 args'
+ sys.exit(1)
+ pp.pprint(client.create_database(args[0],args[1],))
+
+elif cmd == 'get_database':
+ if len(args) != 1:
+ print 'get_database requires 1 args'
+ sys.exit(1)
+ pp.pprint(client.get_database(args[0],))
+
+elif cmd == 'drop_database':
+ if len(args) != 1:
+ print 'drop_database requires 1 args'
+ sys.exit(1)
+ pp.pprint(client.drop_database(args[0],))
+
+elif cmd == 'get_databases':
+ if len(args) != 0:
+ print 'get_databases requires 0 args'
+ sys.exit(1)
+ pp.pprint(client.get_databases())
+
+elif cmd == 'get_type':
+ if len(args) != 1:
+ print 'get_type requires 1 args'
+ sys.exit(1)
+ pp.pprint(client.get_type(args[0],))
+
+elif cmd == 'create_type':
+ if len(args) != 1:
+ print 'create_type requires 1 args'
+ sys.exit(1)
+ pp.pprint(client.create_type(eval(args[0]),))
+
+elif cmd == 'drop_type':
+ if len(args) != 1:
+ print 'drop_type requires 1 args'
+ sys.exit(1)
+ pp.pprint(client.drop_type(args[0],))
+
+elif cmd == 'get_type_all':
+ if len(args) != 1:
+ print 'get_type_all requires 1 args'
+ sys.exit(1)
+ pp.pprint(client.get_type_all(args[0],))
+
+elif cmd == 'get_fields':
+ if len(args) != 2:
+ print 'get_fields requires 2 args'
+ sys.exit(1)
+ pp.pprint(client.get_fields(args[0],args[1],))
+
+elif cmd == 'create_table':
+ if len(args) != 1:
+ print 'create_table requires 1 args'
+ sys.exit(1)
+ pp.pprint(client.create_table(eval(args[0]),))
+
+elif cmd == 'drop_table':
+ if len(args) != 3:
+ print 'drop_table requires 3 args'
+ sys.exit(1)
+ pp.pprint(client.drop_table(args[0],args[1],eval(args[2]),))
+
+elif cmd == 'get_tables':
+ if len(args) != 2:
+ print 'get_tables requires 2 args'
+ sys.exit(1)
+ pp.pprint(client.get_tables(args[0],args[1],))
+
+elif cmd == 'get_table':
+ if len(args) != 2:
+ print 'get_table requires 2 args'
+ sys.exit(1)
+ pp.pprint(client.get_table(args[0],args[1],))
+
+elif cmd == 'set_table_parameters':
+ if len(args) != 3:
+ print 'set_table_parameters requires 3 args'
+ sys.exit(1)
+ pp.pprint(client.set_table_parameters(args[0],args[1],eval(args[2]),))
+
+elif cmd == 'alter_table':
+ if len(args) != 3:
+ print 'alter_table requires 3 args'
+ sys.exit(1)
+ pp.pprint(client.alter_table(args[0],args[1],eval(args[2]),))
+
+elif cmd == 'truncate_table':
+ if len(args) != 3:
+ print 'truncate_table requires 3 args'
+ sys.exit(1)
+ pp.pprint(client.truncate_table(args[0],args[1],args[2],))
+
+elif cmd == 'cat':
+ if len(args) != 4:
+ print 'cat requires 4 args'
+ sys.exit(1)
+ pp.pprint(client.cat(args[0],args[1],args[2],eval(args[3]),))
+
+elif cmd == 'add_partition':
+ if len(args) != 1:
+ print 'add_partition requires 1 args'
+ sys.exit(1)
+ pp.pprint(client.add_partition(eval(args[0]),))
+
+elif cmd == 'append_partition':
+ if len(args) != 3:
+ print 'append_partition requires 3 args'
+ sys.exit(1)
+ pp.pprint(client.append_partition(args[0],args[1],eval(args[2]),))
+
+elif cmd == 'drop_partition':
+ if len(args) != 4:
+ print 'drop_partition requires 4 args'
+ sys.exit(1)
+ pp.pprint(client.drop_partition(args[0],args[1],eval(args[2]),eval(args[3]),))
+
+elif cmd == 'get_partition':
+ if len(args) != 3:
+ print 'get_partition requires 3 args'
+ sys.exit(1)
+ pp.pprint(client.get_partition(args[0],args[1],eval(args[2]),))
+
+elif cmd == 'get_partitions':
+ if len(args) != 3:
+ print 'get_partitions requires 3 args'
+ sys.exit(1)
+ pp.pprint(client.get_partitions(args[0],args[1],eval(args[2]),))
+
+elif cmd == 'set_partition_parameters':
+ if len(args) != 4:
+ print 'set_partition_parameters requires 4 args'
+ sys.exit(1)
+ pp.pprint(client.set_partition_parameters(args[0],args[1],args[2],eval(args[3]),))
+
+elif cmd == 'alter_partitions':
+ if len(args) != 2:
+ print 'alter_partitions requires 2 args'
+ sys.exit(1)
+ pp.pprint(client.alter_partitions(eval(args[0]),eval(args[1]),))
+
+elif cmd == 'create_index':
+ if len(args) != 1:
+ print 'create_index requires 1 args'
+ sys.exit(1)
+ pp.pprint(client.create_index(eval(args[0]),))
+
+transport.close()