You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kh...@apache.org on 2013/08/15 00:29:18 UTC
svn commit: r1514073 - in /hive/trunk/hcatalog/bin: hcat.py hcat_server.py
hcatcfg.py
Author: khorgath
Date: Wed Aug 14 22:29:18 2013
New Revision: 1514073
URL: http://svn.apache.org/r1514073
Log:
HIVE-5013 : [HCatalog] Create hcat.py, hcat_server.py to make HCatalog work on Windows (Daniel Dai via Sushanth Sowmyan)
Added:
hive/trunk/hcatalog/bin/hcat.py
hive/trunk/hcatalog/bin/hcat_server.py
hive/trunk/hcatalog/bin/hcatcfg.py
Added: hive/trunk/hcatalog/bin/hcat.py
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/bin/hcat.py?rev=1514073&view=auto
==============================================================================
--- hive/trunk/hcatalog/bin/hcat.py (added)
+++ hive/trunk/hcatalog/bin/hcat.py Wed Aug 14 22:29:18 2013
@@ -0,0 +1,154 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Resolve our absolute path
+# resolve links - $0 may be a softlink
+
+import os
+import sys
+import glob
+import subprocess
+
+# Determine our absolute path, resolving any symbolic links
+this = os.path.realpath(sys.argv[0])
+bindir = os.path.dirname(this) + os.path.sep
+
+# Add the libexec directory to our search path so we can find the hcat-config
+# module
+sys.path.append(os.path.join(bindir, os.path.pardir, "libexec"))
+import hcatcfg
+
+# Find our config directory and Hadoop
+hcatcfg.findCfgFile()
+hcatcfg.findHadoop()
+
+# See if any debug flags have been turned on
+debug = 0
+try:
+ sys.argv.remove('-secretDebugCmd')
+ debug = 1
+except ValueError:
+ pass
+
+dumpClasspath = 0
+try:
+ sys.argv.remove('-classpath')
+ dumpClasspath = 1
+except ValueError:
+ pass
+
+# find HIVE installation directory
+hcatcfg.findHive()
+if 'HIVE_HOME' not in os.environ:
+ sys.exit("Hive not found. Set HIVE_HOME to directory containing Hive.")
+
+if 'HIVE_LIB_DIR' not in os.environ:
+ sys.exit("Cannot find lib dir within HIVE_HOME %s" % (os.environ['HIVE_HOME'] + os.path.sep + "lib"))
+
+if 'HIVE_CONF_DIR' not in os.environ:
+ sys.exit("Cannot find conf dir within HIVE_HOME %s" % (os.environ['HIVE_HOME'] + os.path.sep + "conf"))
+
+##### jars addition
+# find the hcatalog jar and add it to hadoop classpath
+hcatPrefix = hcatcfg.findHCatPrefix(bindir)
+
+hcatJars = glob.glob(os.path.join(hcatPrefix, 'share', 'hcatalog', 'hcatalog-core-*.jar'))
+
+if len(hcatJars) > 1:
+ sys.exit("Found more than one hcatalog jar in the prefix path")
+
+if len(hcatJars) < 1:
+ sys.exit("HCatalog jar not found in directory %s" % (os.path.join(hcatPrefix, 'share', 'hcatalog', 'hcatalog-core-*.jar')))
+
+if 'HADOOP_CLASSPATH' not in os.environ:
+ os.putenv('HADOOP_CLASSPATH', '')
+ os.environ['HADOOP_CLASSPATH'] = ''
+
+os.environ['HADOOP_CLASSPATH'] += os.pathsep + hcatJars[0]
+# done adding the hcatalog jar to the hadoop classpath
+
+# add all the other jars
+hcatLibJarFiles = os.path.join(hcatPrefix, 'share', 'hcatalog', 'lib', '*')
+os.environ['HADOOP_CLASSPATH'] += os.pathsep + hcatLibJarFiles
+
+# adding hive jars
+hiveJars = os.path.join(os.environ['HIVE_LIB_DIR'], '*')
+os.environ['HADOOP_CLASSPATH'] += os.pathsep + hiveJars
+
+##### done with addition of jars
+
+
+##### add conf dirs to the classpath
+
+# add the hive conf dir and if exists hbase conf dir
+
+os.environ['HADOOP_CLASSPATH'] += os.pathsep + os.environ['HIVE_CONF_DIR']
+
+# if the hbase conf dir is present in the environment, add it.
+# there are no checks to see if that path exists
+# FIXME add check - original shell script does not do much if the path
+# does not exist either
+try:
+ if os.environ['HBASE_CONF_DIR'] != "":
+ os.environ['HADOOP_CLASSPATH'] += os.pathsep + os.environ['HBASE_CONF_DIR']
+except:
+ pass
+
+##### done with adding conf dirs to the classpath
+
+
+sys.stdout.flush()
+
+if os.name == "posix":
+ hadoopcmd = "hadoop"
+else:
+ hadoopcmd = "hadoop.cmd"
+
+if 'HADOOP_OPTS' not in os.environ:
+ os.environ['HADOOP_OPTS'] = ''
+
+# log under the Hive log dir but use a separate log file for HCat logs
+os.environ['HADOOP_OPTS'] += " " + "-Dhive.log.file=hcat.log" + " " + "-Dhive.log.dir=" + os.path.join(os.environ['HIVE_HOME'], "logs")
+
+##### Uncomment to debug log4j configuration
+#os.environ['HADOOP_OPTS'] += " -Dlog4j.debug"
+
+cmdLine = os.path.join(os.environ['HADOOP_PREFIX'], "bin", hadoopcmd)
+if os.name == "posix":
+ cmd = [cmdLine, "jar", hcatJars[0], "org.apache.hcatalog.cli.HCatCli"] + sys.argv[1:len(sys.argv)]
+else:
+ cmd = ["call", cmdLine, "jar", hcatJars[0], "org.apache.hcatalog.cli.HCatCli"] + sys.argv[1:len(sys.argv)]
+
+
+if debug == 1:
+ print "Would run:"
+ print "exec " + str(cmd)
+ print " with HADOOP_CLASSPATH set to %s" % (os.environ['HADOOP_CLASSPATH'])
+ try:
+ print " and HADOOP_OPTS set to %s" % (os.environ['HADOOP_OPTS'])
+ except:
+ pass
+else:
+ if dumpClasspath == 1:
+ print os.environ['HADOOP_CLASSPATH']
+ else:
+ if os.name == "posix":
+ retval = subprocess.call(cmd)
+ else:
+ retval = subprocess.call(cmd, stdin=None, stdout=None, stderr=None, shell=True)
+ os.environ['errorlevel'] = str(retval)
+ sys.exit(retval)
+
Added: hive/trunk/hcatalog/bin/hcat_server.py
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/bin/hcat_server.py?rev=1514073&view=auto
==============================================================================
--- hive/trunk/hcatalog/bin/hcat_server.py (added)
+++ hive/trunk/hcatalog/bin/hcat_server.py Wed Aug 14 22:29:18 2013
@@ -0,0 +1,165 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import os
+import subprocess
+import time
+import glob
+
+from time import strftime
+
+sleepTime = 3
+def print_usage():
+ print "Usage: %s [--config confdir] COMMAND" % (sys.argv[0])
+ print " start Start HCatalog Server"
+ print " stop Stop HCatalog Server"
+
+def start_hcat():
+ global sleepTime
+ # back ground the metastore service and record the pid
+ pidFile = os.path.join(os.environ['HCAT_LOG_DIR'], 'hcat.pid')
+
+ try:
+ pidFileDesc = open(pidFile, 'r')
+ for line in pidFileDesc:
+ pidWords = line.split()
+ for pidStr in pidWords:
+ pid = int(pidStr.rstrip('\n'))
+
+ pidFileDesc.close()
+ # check if service is already running, if so exit
+ os.kill(pid, 0)
+ sys.exit("HCatalog server appears to be running. If you are sure it is not remove %s and re-run this script" % (pidFile))
+ except:
+ pass
+
+ os.environ['HIVE_SITE_XML'] = os.path.join(os.environ['HIVE_HOME'], 'conf', 'hive-site.xml')
+ if os.path.exists(os.environ['HIVE_SITE_XML']) == False:
+ sys.exit("Missing hive-site.xml, expected at %s" % (os.environ['HIVE_SITE_XML']))
+
+ # Find our Warehouse dir from the config file
+ # WAREHOUSE_DIR=`sed -n '/<name>hive.metastore.warehouse.dir<\/name>/ {
+ # n
+ # s/.*<value>\(.*\)<\/value>.*/\1/p
+ # }' $HIVE_SITE_XML`
+ # HADOOP_OPTS="$HADOOP_OPTS -Dhive.metastore.warehouse.dir=$WAREHOUSE_DIR "
+
+ # add in hive-site.xml to classpath
+ if 'AUX_CLASSPATH' not in os.environ:
+ os.environ['AUX_CLASSPATH'] = ''
+
+ os.environ['AUX_CLASSPATH'] += os.pathsep + os.path.dirname(os.environ['HIVE_SITE_XML'])
+
+ # add jars from db connectivity dir - be careful to not point to something like /lib
+ try:
+ for dbRootJars in glob.glob(os.path.join(os.environ['DBROOT'], '*.jar')):
+ os.environ['AUX_CLASSPATH'] += os.pathsep + dbRootJars
+ except:
+ pass
+
+ for hcatLibJars in glob.glob(os.path.join(os.environ['HCAT_PREFIX'], 'share', 'hcatalog', 'lib', '*.jar')):
+ os.environ['AUX_CLASSPATH'] += os.pathsep + hcatLibJars
+
+ for hcatJar in glob.glob(os.path.join(os.environ['HCAT_PREFIX'], 'share', 'hcatalog', '*.jar')):
+ os.environ['AUX_CLASSPATH'] += os.pathsep + hcatJar
+
+ if 'HADOOP_OPTS' not in os.environ:
+ os.environ['HADOOP_OPTS'] = ''
+
+ os.environ['HADOOP_OPTS'] += " -server -XX:+UseConcMarkSweepGC -XX:ErrorFile=" + os.path.join(os.environ['HCAT_LOG_DIR'], 'hcat_err_pid%p.log') + " -Xloggc:" + os.path.join(os.environ['HCAT_LOG_DIR'], 'hcat_gc.log-') + strftime("%Y%m%d%H%M") + " -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
+ os.environ['HADOOP_HEAPSIZE'] = '2048' # 8G is better if you have it
+
+ if os.name == "posix":
+ hivecmd = "hive"
+ else:
+ hivecmd = "hive.cmd"
+
+ command = os.path.join(os.environ['HIVE_HOME'], "bin", hivecmd)
+ outFile = os.path.join(os.environ['HCAT_LOG_DIR'], "hcat.out")
+ outfd = open(outFile, 'w')
+ errFile = os.path.join(os.environ['HCAT_LOG_DIR'], "hcat.err")
+ errfd = open(errFile, 'w')
+ windowsTmpFile = os.path.join(os.environ['HCAT_LOG_DIR'], "windows.tmp")
+ child = subprocess.Popen([command, "--service", "metastore"], stdout=outfd, stderr=errfd)
+ pid = child.pid
+ print "Started metastore server init, testing if initialized correctly..."
+ time.sleep(sleepTime)
+ try:
+ if os.name == "posix":
+ os.kill(pid, 0)
+ else:
+ ret = os.system("jps | find /I \"HiveMetaStore\" > " + windowsTmpFile + "")
+ if ret != 0:
+ raise Exception("error starting process")
+ windowsTmpFd = open(windowsTmpFile, 'r')
+ pid = int(windowsTmpFd.readline().split(" ")[0])
+ pidFileDesc = open(pidFile, 'w')
+ pidFileDesc.write(str(pid))
+ pidFileDesc.close()
+ print "Metastore initialized successfully"
+ except Exception as inst:
+ print inst
+ sys.exit("Metastore startup failed, see %s" % (errFile))
+
+ return
+
+def stop_hcat():
+
+ pidFile = os.path.join(os.environ['HCAT_LOG_DIR'], 'hcat.pid')
+
+ pid = 0
+ kill = False
+ try:
+ pidFileDesc = open(pidFile, 'r')
+ for line in pidFileDesc:
+ words = line.split()
+ pid = int(words[0])
+
+ os.kill(pid, 6)
+
+ except:
+ kill = True
+ pass
+
+ if kill == True:
+ try:
+ os.kill(pid, 9)
+
+ except:
+ sys.exit("Failed to stop metastore server")
+
+ return
+
+if __name__ == "__main__":
+
+ this = os.path.realpath(sys.argv[0])
+ bindir = os.path.dirname(this) + os.path.sep
+
+ import hcatcfg
+ hcatLogDir = hcatcfg.getHCatLogDir(bindir)
+ hcatcfg.findHCatPrefix(bindir)
+ os.environ['HCAT_LOG_DIR'] = hcatLogDir
+
+ if len(sys.argv) == 1:
+ print_usage()
+ sys.exit()
+
+ if sys.argv[1] == 'start':
+ start_hcat()
+
+ else:
+ stop_hcat()
Added: hive/trunk/hcatalog/bin/hcatcfg.py
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/bin/hcatcfg.py?rev=1514073&view=auto
==============================================================================
--- hive/trunk/hcatalog/bin/hcatcfg.py (added)
+++ hive/trunk/hcatalog/bin/hcatcfg.py Wed Aug 14 22:29:18 2013
@@ -0,0 +1,86 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Resolve our absolute path
+# resolve links - $0 may be a softlink
+
+import os.path
+import sys
+
+# Find the config file
+def findCfgFile():
+ defaultConfDir = None
+ if 'HCAT_PREFIX' in os.environ and os.path.exists(os.environ['HCAT_PREFIX'] + \
+ os.path.sep + buildPath(["etc","hcatalog"])):
+ defaultConfDir = os.environ['HCAT_PREFIX'] + os.path.sep + \
+ buildPath(["etc", "hcatalog"])
+ else:
+ defaultConfDir = buildAbsPath(["etc", "hcatalog"])
+ if 'HCAT_CONF_DIR' not in os.environ:
+ os.environ['HCAT_CONF_DIR'] = defaultConfDir
+
+def findHadoop():
+ if 'HADOOP_HOME' in os.environ and os.path.exists(os.environ['HADOOP_HOME'] \
+ + os.path.sep + buildPath(["bin", "hadoop"])):
+ os.environ['HADOOP_PREFIX'] = os.environ['HADOOP_HOME']
+ elif 'HCAT_PREFIX' in os.environ and os.path.exists(os.environ['HCAT_PREFIX'] \
+ + os.path.sep + buildPath(["bin", "hadoop"])):
+ os.environ['HADOOP_PREFIX'] = os.environ['HCAT_PREFIX']
+ elif not ('HADOOP_PREFIX' in os.environ and \
+ os.path.exists(os.environ['HADOOP_PREFIX'] + os.path.sep + \
+ buildPath(["bin", "hadoop"]))):
+ sys.exit("Hadoop not found. Set HADOOP_HOME to the directory containing Hadoop.")
+
+def concatPath(x, y):
+ return x + os.path.sep + y
+
+def buildPath(pathElements):
+ return reduce(concatPath, pathElements)
+
+def buildAbsPath(pathElements):
+ return os.path.sep + buildPath(pathElements)
+
+def findHive():
+ # TODO, check for Hive in path. For now, just look in known locations and
+ # HIVE_HOME
+ # No need to be OS independent checkinf for /usr/bin/hive since this is an
+ # RPM specific path
+ # If HIVE_HOME is set it overrides default locations
+ if os.path.exists("/usr/bin/hive") and ('HIVE_HOME' not in os.environ):
+ os.environ['HIVE_HOME'] = buildAbsPath(["usr", "lib", "hive"]);
+
+ if 'HIVE_HOME' not in os.environ:
+ # the api user determines how to handle the non-existence of HIVE_HOME
+ return
+
+ if os.path.exists(os.path.join(os.environ['HIVE_HOME'], 'lib')):
+ os.environ['HIVE_LIB_DIR'] = os.path.join(os.environ['HIVE_HOME'], 'lib')
+ else:
+ return
+
+ if os.path.exists(os.path.join(os.environ['HIVE_HOME'], 'conf')):
+ os.environ['HIVE_CONF_DIR'] = os.path.join(os.environ['HIVE_HOME'], 'conf')
+ else:
+ return
+
+ return
+
+def findHCatPrefix(binDir):
+ os.environ['HCAT_PREFIX'] = binDir + '..' + os.path.sep
+ return os.environ['HCAT_PREFIX']
+
+def getHCatLogDir(binDir):
+ return os.path.join(binDir, '..', 'var', 'log')