You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by dh...@apache.org on 2008/09/17 22:13:01 UTC

svn commit: r696427 [1/4] - in /hadoop/core/trunk: ./ src/contrib/hive/ src/contrib/hive/conf/ src/contrib/hive/metastore/if/ src/contrib/hive/metastore/src/gen-py/hive_metastore/ src/contrib/hive/metastore/src/java/org/apache/hadoop/hive/metastore/

Author: dhruba
Date: Wed Sep 17 13:13:00 2008
New Revision: 696427

URL: http://svn.apache.org/viewvc?rev=696427&view=rev
Log:
HADOOP-4087. Hive Metastore API for php and python clients.
(Prasad Chakka via dhruba)


Added:
    hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetastore-remote
    hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetastore.py
    hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftMetaStore-remote
    hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftMetaStore.py
Removed:
    hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetaStore-remote
    hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetaStore.py
Modified:
    hadoop/core/trunk/CHANGES.txt
    hadoop/core/trunk/src/contrib/hive/build.xml
    hadoop/core/trunk/src/contrib/hive/conf/jpox.properties
    hadoop/core/trunk/src/contrib/hive/metastore/if/hive_metastore.thrift
    hadoop/core/trunk/src/contrib/hive/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
    hadoop/core/trunk/src/contrib/hive/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java

Modified: hadoop/core/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/CHANGES.txt?rev=696427&r1=696426&r2=696427&view=diff
==============================================================================
--- hadoop/core/trunk/CHANGES.txt (original)
+++ hadoop/core/trunk/CHANGES.txt Wed Sep 17 13:13:00 2008
@@ -586,6 +586,9 @@
     HADOOP-4125. Handles Reduce cleanup tip on the web ui.
     (Amareshwari Sriramadasu via ddas)
 
+    HADOOP-4087. Hive Metastore API for php and python clients.
+    (Prasad Chakka via dhruba)
+
 Release 0.18.1 - 2008-09-17
 
   IMPROVEMENTS

Modified: hadoop/core/trunk/src/contrib/hive/build.xml
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/build.xml?rev=696427&r1=696426&r2=696427&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/build.xml (original)
+++ hadoop/core/trunk/src/contrib/hive/build.xml Wed Sep 17 13:13:00 2008
@@ -130,6 +130,8 @@
     <mkdir dir="${target.example.dir}"/>
     <mkdir dir="${target.example.dir}/files"/>
     <mkdir dir="${target.example.dir}/queries"/>
+    <mkdir dir="${target.lib.dir}/py"/>
+    <mkdir dir="${target.lib.dir}/php"/>
     <exec executable="cp" failonerror="true">
       <arg line="-p '${hive.root}/bin/hive' '${target.bin.dir}'"/>
     </exec>
@@ -137,6 +139,12 @@
     <copy file="${basedir}/conf/hive-default.xml" todir="${target.conf.dir}"/>
     <copy file="${basedir}/conf/hive-log4j.properties" todir="${target.conf.dir}"/>
     <copy file="${basedir}/conf/jpox.properties" todir="${target.conf.dir}"/>
+    <copy todir="${target.lib.dir}/php">
+      <fileset dir="${hive.root}/metastore/src/gen-php"/>
+    </copy>
+    <copy todir="${target.lib.dir}/py">
+      <fileset dir="${hive.root}/metastore/src/gen-py"/>
+    </copy>
     <copy todir="${target.lib.dir}" preservelastmodified="true" flatten="true">
       <fileset dir="${hive.root}" includes="*/*.jar, */*/*.jar" excludes="**/antlr-2*,**/antlr-3*"/>
       <fileset file="${build.dir.hive}/cli/hive_cli.jar"/>

Modified: hadoop/core/trunk/src/contrib/hive/conf/jpox.properties
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/conf/jpox.properties?rev=696427&r1=696426&r2=696427&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/conf/jpox.properties (original)
+++ hadoop/core/trunk/src/contrib/hive/conf/jpox.properties Wed Sep 17 13:13:00 2008
@@ -13,3 +13,5 @@
 javax.jdo.option.ConnectionURL=jdbc:derby:;databaseName=test_metastore_db;create=true
 javax.jdo.option.ConnectionUserName=APP
 javax.jdo.option.ConnectionPassword=mine
+org.jpox.cache.level2=true
+org.jpox.cache.level2.type=SOFT

Modified: hadoop/core/trunk/src/contrib/hive/metastore/if/hive_metastore.thrift
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/metastore/if/hive_metastore.thrift?rev=696427&r1=696426&r2=696427&view=diff
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/metastore/if/hive_metastore.thrift (original)
+++ hadoop/core/trunk/src/contrib/hive/metastore/if/hive_metastore.thrift Wed Sep 17 13:13:00 2008
@@ -88,8 +88,8 @@
 }
 
 struct FieldSchema {
-  string name,
-  string type,
+  string name, // name of the field
+  string type, // type of the field. primitive types defined above, specify list<TYPE_NAME>, map<TYPE_NAME, TYPE_NAME> for lists & maps 
   string comment
 }
 
@@ -124,20 +124,20 @@
 }
 
 struct StorageDescriptor {
-  list<FieldSchema> cols,
-  string location,
-  string inputFormat;
-  string outputFormat;
-  bool isCompressed;
+  list<FieldSchema> cols, // required (refer to types defined above)
+  string location, // defaults to <warehouse loc>/<db loc>/tablename
+  string inputFormat; // SequenceFileInputFormat (binary) or TextInputFormat`  or custom format
+  string outputFormat; // SequenceFileOutputFormat (binary) or IgnoreKeyTextOutputFormat or custom format
+  bool isCompressed; // compressed or not
   i32 numBuckets = 32, // this must be specified if there are any dimension columns
-  SerDeInfo serdeInfo;
-  list<string> bucketCols, //reducer grouping columns and clustering columns and bucketing columns`
-  list<Order> sortCols,
-  map<string, string> parameters
+  SerDeInfo serdeInfo; // serialization and deserialization information
+  list<string> bucketCols, // reducer grouping columns and clustering columns and bucketing columns`
+  list<Order> sortCols, // sort order of the data in each bucket
+  map<string, string> parameters // any user supplied key value hash
 }
 
 struct Table {
-  string tableName,
+  string tableName, 
   string database,
   string owner,
   i32 createTime,
@@ -214,7 +214,14 @@
   list<FieldSchema> get_fields(string db_name, string table_name) throws (MetaException ouch1, UnknownTableException ouch2, UnknownDBException ouch3),
 
   // Tables
-  // create the table with the given table object in the given database
+  // create a Hive table. Following fields must be set
+  // Table.tableName
+  // Table.database (only 'default' for now until Hive QL supports databases)
+  // Table.owner (not needed, but good to have for tracking purposes)
+  // Table.sd.cols (list of field schemas)
+  // Table.sd.inputFormat ( SequenceFileInputFormat (binary like falcon tables or u_full) or TextInputFormat)
+  // Table.sd.outputFormat ( SequenceFileInputFormat (binary) or TextInputFormat)
+  // Table.sd.serdeInfo.serializationLib (SerDe class name such as org.apache.hadoop.hive.serde.simple_meta.MetadataTypedColumnsetSerDe
   void create_table(1:Table tbl) throws(1:AlreadyExistsException ouch1, 2:InvalidObjectException ouch2, 3:MetaException ouch3, 4:NoSuchObjectException o4)
   // drops the table and all the partitions associated with it if the table has partitions
   // delete data (including partitions) if deleteData is set to true

Added: hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetastore-remote
URL: http://svn.apache.org/viewvc/hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetastore-remote?rev=696427&view=auto
==============================================================================
--- hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetastore-remote (added)
+++ hadoop/core/trunk/src/contrib/hive/metastore/src/gen-py/hive_metastore/ThriftHiveMetastore-remote Wed Sep 17 13:13:00 2008
@@ -0,0 +1,247 @@
+#!/usr/bin/env python
+#
+# Autogenerated by Thrift
+#
+# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
+#
+
+import sys
+import pprint
+from urlparse import urlparse
+from thrift.transport import TTransport
+from thrift.transport import TSocket
+from thrift.transport import THttpClient
+from thrift.protocol import TBinaryProtocol
+
+import ThriftHiveMetastore
+from ttypes import *
+
+if len(sys.argv) <= 1 or sys.argv[1] == '--help':
+  print ''
+  print 'Usage: ' + sys.argv[0] + ' [-h host:port] [-u url] [-f[ramed]] function [arg1 [arg2...]]'
+  print ''
+  print 'Functions:'
+  print '  bool create_database(string name, string location_uri)'
+  print '  Database get_database(string name)'
+  print '  bool drop_database(string name)'
+  print '   get_databases()'
+  print '  Type get_type(string name)'
+  print '  bool create_type(Type type)'
+  print '  bool drop_type(string type)'
+  print '   get_type_all(string name)'
+  print '   get_fields(string db_name, string table_name)'
+  print '  void create_table(Table tbl)'
+  print '  void drop_table(string dbname, string name, bool deleteData)'
+  print '   get_tables(string db_name, string pattern)'
+  print '  Table get_table(string dbname, string tbl_name)'
+  print '  bool set_table_parameters(string dbname, string tbl_name,  params)'
+  print '  void alter_table(string dbname, string tbl_name, Table new_tbl)'
+  print '  void truncate_table(string db_name, string table_name, string partition)'
+  print '   cat(string db_name, string table_name, string partition, i32 high)'
+  print '  Partition add_partition(Partition new_part)'
+  print '  Partition append_partition(string db_name, string tbl_name,  part_vals)'
+  print '  bool drop_partition(string db_name, string tbl_name,  part_vals, bool deleteData)'
+  print '  Partition get_partition(string db_name, string tbl_name,  part_vals)'
+  print '   get_partitions(string db_name, string tbl_name, i16 max_parts)'
+  print '  bool set_partition_parameters(string db_name, string tbl_name, string pname,  params)'
+  print '  bool alter_partitions(StorageDescriptor sd,  parts)'
+  print '  bool create_index(Index index_def)'
+  print ''
+  sys.exit(0)
+
+pp = pprint.PrettyPrinter(indent = 2)
+host = 'localhost'
+port = 9090
+uri = ''
+framed = False
+http = False
+argi = 1
+
+if sys.argv[argi] == '-h':
+  parts = sys.argv[argi+1].split(':') 
+  host = parts[0]
+  port = int(parts[1])
+  argi += 2
+
+if sys.argv[argi] == '-u':
+  url = urlparse(sys.argv[argi+1])
+  parts = url[1].split(':') 
+  host = parts[0]
+  if len(parts) > 1:
+    port = int(parts[1])
+  else:
+    port = 80
+  uri = url[2]
+  http = True
+  argi += 2
+
+if sys.argv[argi] == '-f' or sys.argv[argi] == '-framed':
+  framed = True
+  argi += 1
+
+cmd = sys.argv[argi]
+args = sys.argv[argi+1:]
+
+if http:
+  transport = THttpClient.THttpClient(host, port, uri)
+else:
+  socket = TSocket.TSocket(host, port)
+  if framed:
+    transport = TTransport.TFramedTransport(socket)
+  else:
+    transport = TTransport.TBufferedTransport(socket)
+protocol = TBinaryProtocol.TBinaryProtocol(transport)
+client = ThriftHiveMetastore.Client(protocol)
+transport.open()
+
+if cmd == 'create_database':
+  if len(args) != 2:
+    print 'create_database requires 2 args'
+    sys.exit(1)
+  pp.pprint(client.create_database(args[0],args[1],))
+
+elif cmd == 'get_database':
+  if len(args) != 1:
+    print 'get_database requires 1 args'
+    sys.exit(1)
+  pp.pprint(client.get_database(args[0],))
+
+elif cmd == 'drop_database':
+  if len(args) != 1:
+    print 'drop_database requires 1 args'
+    sys.exit(1)
+  pp.pprint(client.drop_database(args[0],))
+
+elif cmd == 'get_databases':
+  if len(args) != 0:
+    print 'get_databases requires 0 args'
+    sys.exit(1)
+  pp.pprint(client.get_databases())
+
+elif cmd == 'get_type':
+  if len(args) != 1:
+    print 'get_type requires 1 args'
+    sys.exit(1)
+  pp.pprint(client.get_type(args[0],))
+
+elif cmd == 'create_type':
+  if len(args) != 1:
+    print 'create_type requires 1 args'
+    sys.exit(1)
+  pp.pprint(client.create_type(eval(args[0]),))
+
+elif cmd == 'drop_type':
+  if len(args) != 1:
+    print 'drop_type requires 1 args'
+    sys.exit(1)
+  pp.pprint(client.drop_type(args[0],))
+
+elif cmd == 'get_type_all':
+  if len(args) != 1:
+    print 'get_type_all requires 1 args'
+    sys.exit(1)
+  pp.pprint(client.get_type_all(args[0],))
+
+elif cmd == 'get_fields':
+  if len(args) != 2:
+    print 'get_fields requires 2 args'
+    sys.exit(1)
+  pp.pprint(client.get_fields(args[0],args[1],))
+
+elif cmd == 'create_table':
+  if len(args) != 1:
+    print 'create_table requires 1 args'
+    sys.exit(1)
+  pp.pprint(client.create_table(eval(args[0]),))
+
+elif cmd == 'drop_table':
+  if len(args) != 3:
+    print 'drop_table requires 3 args'
+    sys.exit(1)
+  pp.pprint(client.drop_table(args[0],args[1],eval(args[2]),))
+
+elif cmd == 'get_tables':
+  if len(args) != 2:
+    print 'get_tables requires 2 args'
+    sys.exit(1)
+  pp.pprint(client.get_tables(args[0],args[1],))
+
+elif cmd == 'get_table':
+  if len(args) != 2:
+    print 'get_table requires 2 args'
+    sys.exit(1)
+  pp.pprint(client.get_table(args[0],args[1],))
+
+elif cmd == 'set_table_parameters':
+  if len(args) != 3:
+    print 'set_table_parameters requires 3 args'
+    sys.exit(1)
+  pp.pprint(client.set_table_parameters(args[0],args[1],eval(args[2]),))
+
+elif cmd == 'alter_table':
+  if len(args) != 3:
+    print 'alter_table requires 3 args'
+    sys.exit(1)
+  pp.pprint(client.alter_table(args[0],args[1],eval(args[2]),))
+
+elif cmd == 'truncate_table':
+  if len(args) != 3:
+    print 'truncate_table requires 3 args'
+    sys.exit(1)
+  pp.pprint(client.truncate_table(args[0],args[1],args[2],))
+
+elif cmd == 'cat':
+  if len(args) != 4:
+    print 'cat requires 4 args'
+    sys.exit(1)
+  pp.pprint(client.cat(args[0],args[1],args[2],eval(args[3]),))
+
+elif cmd == 'add_partition':
+  if len(args) != 1:
+    print 'add_partition requires 1 args'
+    sys.exit(1)
+  pp.pprint(client.add_partition(eval(args[0]),))
+
+elif cmd == 'append_partition':
+  if len(args) != 3:
+    print 'append_partition requires 3 args'
+    sys.exit(1)
+  pp.pprint(client.append_partition(args[0],args[1],eval(args[2]),))
+
+elif cmd == 'drop_partition':
+  if len(args) != 4:
+    print 'drop_partition requires 4 args'
+    sys.exit(1)
+  pp.pprint(client.drop_partition(args[0],args[1],eval(args[2]),eval(args[3]),))
+
+elif cmd == 'get_partition':
+  if len(args) != 3:
+    print 'get_partition requires 3 args'
+    sys.exit(1)
+  pp.pprint(client.get_partition(args[0],args[1],eval(args[2]),))
+
+elif cmd == 'get_partitions':
+  if len(args) != 3:
+    print 'get_partitions requires 3 args'
+    sys.exit(1)
+  pp.pprint(client.get_partitions(args[0],args[1],eval(args[2]),))
+
+elif cmd == 'set_partition_parameters':
+  if len(args) != 4:
+    print 'set_partition_parameters requires 4 args'
+    sys.exit(1)
+  pp.pprint(client.set_partition_parameters(args[0],args[1],args[2],eval(args[3]),))
+
+elif cmd == 'alter_partitions':
+  if len(args) != 2:
+    print 'alter_partitions requires 2 args'
+    sys.exit(1)
+  pp.pprint(client.alter_partitions(eval(args[0]),eval(args[1]),))
+
+elif cmd == 'create_index':
+  if len(args) != 1:
+    print 'create_index requires 1 args'
+    sys.exit(1)
+  pp.pprint(client.create_index(eval(args[0]),))
+
+transport.close()