You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by kh...@apache.org on 2013/08/15 00:29:18 UTC

svn commit: r1514073 - in /hive/trunk/hcatalog/bin: hcat.py hcat_server.py hcatcfg.py

Author: khorgath
Date: Wed Aug 14 22:29:18 2013
New Revision: 1514073

URL: http://svn.apache.org/r1514073
Log:
HIVE-5013 : [HCatalog] Create hcat.py, hcat_server.py to make HCatalog work on Windows (Daniel Dai via Sushanth Sowmyan)

Added:
    hive/trunk/hcatalog/bin/hcat.py
    hive/trunk/hcatalog/bin/hcat_server.py
    hive/trunk/hcatalog/bin/hcatcfg.py

Added: hive/trunk/hcatalog/bin/hcat.py
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/bin/hcat.py?rev=1514073&view=auto
==============================================================================
--- hive/trunk/hcatalog/bin/hcat.py (added)
+++ hive/trunk/hcatalog/bin/hcat.py Wed Aug 14 22:29:18 2013
@@ -0,0 +1,154 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Resolve our absolute path                                                      
+# resolve links - $0 may be a softlink                                           
+
+import os
+import sys
+import glob
+import subprocess
+
+# Determine our absolute path, resolving any symbolic links
+this = os.path.realpath(sys.argv[0])
+bindir = os.path.dirname(this) + os.path.sep
+
+# Add the libexec directory to our search path so we can find the hcat-config
+# module
+sys.path.append(os.path.join(bindir, os.path.pardir, "libexec"))
+import hcatcfg
+
+# Find our config directory and Hadoop
+hcatcfg.findCfgFile()
+hcatcfg.findHadoop()
+
+# See if any debug flags have been turned on
+debug = 0
+try:
+  sys.argv.remove('-secretDebugCmd')
+  debug = 1
+except ValueError:
+  pass
+
+dumpClasspath = 0
+try:
+  sys.argv.remove('-classpath')
+  dumpClasspath = 1
+except ValueError:
+  pass
+
+# find HIVE installation directory
+hcatcfg.findHive()
+if 'HIVE_HOME' not in os.environ:
+  sys.exit("Hive not found.  Set HIVE_HOME to directory containing Hive.")
+    
+if 'HIVE_LIB_DIR' not in os.environ:
+  sys.exit("Cannot find lib dir within HIVE_HOME %s" % (os.environ['HIVE_HOME'] + os.path.sep + "lib"))
+
+if 'HIVE_CONF_DIR' not in os.environ:
+  sys.exit("Cannot find conf dir within HIVE_HOME %s" % (os.environ['HIVE_HOME'] + os.path.sep + "conf"))
+
+##### jars addition
+# find the hcatalog jar and add it to hadoop classpath
+hcatPrefix = hcatcfg.findHCatPrefix(bindir)
+
+hcatJars = glob.glob(os.path.join(hcatPrefix, 'share', 'hcatalog', 'hcatalog-core-*.jar'))
+
+if len(hcatJars) > 1:
+  sys.exit("Found more than one hcatalog jar in the prefix path")
+
+if len(hcatJars) < 1:
+  sys.exit("HCatalog jar not found in directory %s" % (os.path.join(hcatPrefix, 'share', 'hcatalog', 'hcatalog-core-*.jar')))
+
+if 'HADOOP_CLASSPATH' not in os.environ:
+  os.putenv('HADOOP_CLASSPATH', '')
+  os.environ['HADOOP_CLASSPATH'] = ''
+
+os.environ['HADOOP_CLASSPATH'] += os.pathsep + hcatJars[0]
+# done adding the hcatalog jar to the hadoop classpath
+
+# add all the other jars
+hcatLibJarFiles = os.path.join(hcatPrefix, 'share', 'hcatalog', 'lib', '*')
+os.environ['HADOOP_CLASSPATH'] += os.pathsep + hcatLibJarFiles
+
+# adding hive jars
+hiveJars = os.path.join(os.environ['HIVE_LIB_DIR'], '*')
+os.environ['HADOOP_CLASSPATH'] += os.pathsep + hiveJars
+
+##### done with addition of jars
+
+
+##### add conf dirs to the classpath
+
+# add the hive conf dir and if exists hbase conf dir
+
+os.environ['HADOOP_CLASSPATH'] += os.pathsep + os.environ['HIVE_CONF_DIR']
+
+# if the hbase conf dir is present in the environment, add it.
+# there are no checks to see if that path exists
+# FIXME add check - original shell script does not do much if the path 
+# does not exist either
+try:
+  if os.environ['HBASE_CONF_DIR'] != "":
+    os.environ['HADOOP_CLASSPATH'] += os.pathsep + os.environ['HBASE_CONF_DIR']
+except:
+  pass
+
+##### done with adding conf dirs to the classpath
+
+
+sys.stdout.flush()
+
+if os.name == "posix":
+  hadoopcmd = "hadoop"
+else:
+  hadoopcmd = "hadoop.cmd"
+
+if 'HADOOP_OPTS' not in os.environ:
+  os.environ['HADOOP_OPTS'] = ''
+
+# log under the Hive log dir but use a separate log file for HCat logs
+os.environ['HADOOP_OPTS'] += " " + "-Dhive.log.file=hcat.log" + " " + "-Dhive.log.dir=" + os.path.join(os.environ['HIVE_HOME'], "logs")
+
+##### Uncomment to debug log4j configuration
+#os.environ['HADOOP_OPTS'] += " -Dlog4j.debug"
+
+cmdLine = os.path.join(os.environ['HADOOP_PREFIX'], "bin", hadoopcmd)
+if os.name == "posix":
+  cmd = [cmdLine, "jar", hcatJars[0], "org.apache.hcatalog.cli.HCatCli"] + sys.argv[1:len(sys.argv)]
+else:
+  cmd = ["call", cmdLine, "jar", hcatJars[0], "org.apache.hcatalog.cli.HCatCli"] + sys.argv[1:len(sys.argv)]
+
+
+if debug == 1:
+  print "Would run:"
+  print "exec " + str(cmd)
+  print " with HADOOP_CLASSPATH set to %s" % (os.environ['HADOOP_CLASSPATH'])
+  try:
+    print " and HADOOP_OPTS set to %s" % (os.environ['HADOOP_OPTS'])
+  except:
+    pass
+else:
+  if dumpClasspath == 1:
+    print os.environ['HADOOP_CLASSPATH']
+  else:
+    if os.name == "posix":
+      retval = subprocess.call(cmd)
+    else:
+      retval = subprocess.call(cmd,  stdin=None, stdout=None, stderr=None, shell=True)
+    os.environ['errorlevel'] = str(retval)
+    sys.exit(retval)
+

Added: hive/trunk/hcatalog/bin/hcat_server.py
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/bin/hcat_server.py?rev=1514073&view=auto
==============================================================================
--- hive/trunk/hcatalog/bin/hcat_server.py (added)
+++ hive/trunk/hcatalog/bin/hcat_server.py Wed Aug 14 22:29:18 2013
@@ -0,0 +1,165 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import os
+import subprocess
+import time
+import glob
+
+from time import strftime
+
+sleepTime = 3
+def print_usage():
+  print "Usage: %s [--config confdir] COMMAND" % (sys.argv[0])
+  print "  start Start HCatalog Server"
+  print "  stop Stop HCatalog Server"
+
+def start_hcat():
+  global sleepTime
+  # back ground the metastore service and record the pid
+  pidFile = os.path.join(os.environ['HCAT_LOG_DIR'], 'hcat.pid')
+
+  try:
+    pidFileDesc = open(pidFile, 'r')
+    for line in pidFileDesc:
+      pidWords = line.split()
+      for pidStr in pidWords:
+        pid = int(pidStr.rstrip('\n'))
+        
+    pidFileDesc.close()
+  # check if service is already running, if so exit
+    os.kill(pid, 0)
+    sys.exit("HCatalog server appears to be running. If you are sure it is not remove %s and re-run this script" % (pidFile))
+  except:
+    pass
+
+  os.environ['HIVE_SITE_XML'] = os.path.join(os.environ['HIVE_HOME'], 'conf', 'hive-site.xml')
+  if os.path.exists(os.environ['HIVE_SITE_XML']) == False:
+    sys.exit("Missing hive-site.xml, expected at %s" % (os.environ['HIVE_SITE_XML']))
+
+  # Find our Warehouse dir from the config file
+  #  WAREHOUSE_DIR=`sed -n '/<name>hive.metastore.warehouse.dir<\/name>/ {
+  #      n
+  #      s/.*<value>\(.*\)<\/value>.*/\1/p
+  #      }' $HIVE_SITE_XML`
+  #  HADOOP_OPTS="$HADOOP_OPTS -Dhive.metastore.warehouse.dir=$WAREHOUSE_DIR " 
+
+  # add in hive-site.xml to classpath
+  if 'AUX_CLASSPATH' not in os.environ:
+    os.environ['AUX_CLASSPATH'] = ''
+
+  os.environ['AUX_CLASSPATH'] += os.pathsep + os.path.dirname(os.environ['HIVE_SITE_XML'])
+
+  # add jars from db connectivity dir - be careful to not point to something like /lib
+  try:
+    for dbRootJars in glob.glob(os.path.join(os.environ['DBROOT'], '*.jar')):
+      os.environ['AUX_CLASSPATH'] += os.pathsep + dbRootJars
+  except:
+    pass
+
+  for hcatLibJars in glob.glob(os.path.join(os.environ['HCAT_PREFIX'], 'share', 'hcatalog', 'lib', '*.jar')):
+    os.environ['AUX_CLASSPATH'] += os.pathsep + hcatLibJars
+
+  for hcatJar in glob.glob(os.path.join(os.environ['HCAT_PREFIX'], 'share', 'hcatalog', '*.jar')):
+    os.environ['AUX_CLASSPATH'] += os.pathsep + hcatJar
+
+  if 'HADOOP_OPTS' not in os.environ:
+    os.environ['HADOOP_OPTS'] = ''
+
+  os.environ['HADOOP_OPTS'] += " -server -XX:+UseConcMarkSweepGC -XX:ErrorFile=" + os.path.join(os.environ['HCAT_LOG_DIR'], 'hcat_err_pid%p.log') + " -Xloggc:" + os.path.join(os.environ['HCAT_LOG_DIR'], 'hcat_gc.log-') + strftime("%Y%m%d%H%M") + " -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps"
+  os.environ['HADOOP_HEAPSIZE'] = '2048' # 8G is better if you have it
+
+  if os.name == "posix":
+      hivecmd = "hive"
+  else:
+      hivecmd = "hive.cmd"
+
+  command = os.path.join(os.environ['HIVE_HOME'], "bin", hivecmd)
+  outFile = os.path.join(os.environ['HCAT_LOG_DIR'], "hcat.out")
+  outfd = open(outFile, 'w')
+  errFile = os.path.join(os.environ['HCAT_LOG_DIR'], "hcat.err")
+  errfd = open(errFile, 'w')
+  windowsTmpFile = os.path.join(os.environ['HCAT_LOG_DIR'], "windows.tmp")
+  child = subprocess.Popen([command, "--service", "metastore"], stdout=outfd, stderr=errfd)
+  pid = child.pid
+  print "Started metastore server init, testing if initialized correctly..."
+  time.sleep(sleepTime)
+  try:
+    if os.name == "posix":
+        os.kill(pid, 0)
+    else:
+        ret = os.system("jps | find /I \"HiveMetaStore\" > " + windowsTmpFile + "")
+        if ret != 0:
+            raise Exception("error starting process")
+        windowsTmpFd = open(windowsTmpFile, 'r')
+        pid = int(windowsTmpFd.readline().split(" ")[0])
+    pidFileDesc = open(pidFile, 'w')
+    pidFileDesc.write(str(pid))
+    pidFileDesc.close()
+    print "Metastore initialized successfully"
+  except Exception as inst:
+    print inst
+    sys.exit("Metastore startup failed, see %s" % (errFile))
+  
+  return
+
+def stop_hcat():
+
+  pidFile = os.path.join(os.environ['HCAT_LOG_DIR'], 'hcat.pid')
+
+  pid = 0
+  kill = False
+  try:
+    pidFileDesc = open(pidFile, 'r')
+    for line in pidFileDesc:
+      words = line.split()
+      pid = int(words[0])
+
+      os.kill(pid, 6)
+
+  except:
+    kill = True
+    pass
+
+  if kill == True:
+    try:
+      os.kill(pid, 9)
+      
+    except:
+      sys.exit("Failed to stop metastore server")
+
+  return
+
+if __name__ == "__main__":
+
+  this = os.path.realpath(sys.argv[0])
+  bindir = os.path.dirname(this) + os.path.sep
+
+  import hcatcfg
+  hcatLogDir = hcatcfg.getHCatLogDir(bindir)
+  hcatcfg.findHCatPrefix(bindir)
+  os.environ['HCAT_LOG_DIR'] = hcatLogDir
+
+  if len(sys.argv) == 1:
+    print_usage()
+    sys.exit()
+
+  if sys.argv[1] == 'start':
+    start_hcat()
+
+  else:
+    stop_hcat()

Added: hive/trunk/hcatalog/bin/hcatcfg.py
URL: http://svn.apache.org/viewvc/hive/trunk/hcatalog/bin/hcatcfg.py?rev=1514073&view=auto
==============================================================================
--- hive/trunk/hcatalog/bin/hcatcfg.py (added)
+++ hive/trunk/hcatalog/bin/hcatcfg.py Wed Aug 14 22:29:18 2013
@@ -0,0 +1,86 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Resolve our absolute path                                                      
+# resolve links - $0 may be a softlink                                           
+
+import os.path
+import sys
+
+# Find the config file
+def findCfgFile():
+    defaultConfDir = None
+    if 'HCAT_PREFIX' in os.environ and os.path.exists(os.environ['HCAT_PREFIX'] + \
+            os.path.sep + buildPath(["etc","hcatalog"])):
+        defaultConfDir = os.environ['HCAT_PREFIX'] + os.path.sep + \
+            buildPath(["etc", "hcatalog"])
+    else:
+        defaultConfDir = buildAbsPath(["etc", "hcatalog"])
+    if 'HCAT_CONF_DIR' not in os.environ:
+        os.environ['HCAT_CONF_DIR'] = defaultConfDir
+
+def findHadoop():
+    if 'HADOOP_HOME' in os.environ and os.path.exists(os.environ['HADOOP_HOME'] \
+            + os.path.sep + buildPath(["bin", "hadoop"])):
+        os.environ['HADOOP_PREFIX'] = os.environ['HADOOP_HOME']
+    elif 'HCAT_PREFIX' in os.environ and os.path.exists(os.environ['HCAT_PREFIX'] \
+            + os.path.sep + buildPath(["bin", "hadoop"])):
+        os.environ['HADOOP_PREFIX'] = os.environ['HCAT_PREFIX']
+    elif not ('HADOOP_PREFIX' in os.environ and \
+            os.path.exists(os.environ['HADOOP_PREFIX'] + os.path.sep + \
+                buildPath(["bin", "hadoop"]))):
+        sys.exit("Hadoop not found.  Set HADOOP_HOME to the directory containing Hadoop.")
+
+def concatPath(x, y):
+    return x + os.path.sep + y
+
+def buildPath(pathElements):
+    return reduce(concatPath, pathElements)
+
+def buildAbsPath(pathElements):
+    return os.path.sep + buildPath(pathElements)
+
+def findHive():
+    # TODO, check for Hive in path.  For now, just look in known locations and
+    # HIVE_HOME
+    # No need to be OS independent checkinf for /usr/bin/hive since this is an
+    # RPM specific path
+    # If HIVE_HOME is set it overrides default locations
+    if os.path.exists("/usr/bin/hive") and ('HIVE_HOME' not in os.environ):
+        os.environ['HIVE_HOME'] = buildAbsPath(["usr", "lib", "hive"]);
+
+    if 'HIVE_HOME' not in os.environ:
+        # the api user determines how to handle the non-existence of HIVE_HOME
+        return
+
+    if os.path.exists(os.path.join(os.environ['HIVE_HOME'], 'lib')):
+        os.environ['HIVE_LIB_DIR'] = os.path.join(os.environ['HIVE_HOME'], 'lib')
+    else:
+        return
+
+    if os.path.exists(os.path.join(os.environ['HIVE_HOME'], 'conf')):
+        os.environ['HIVE_CONF_DIR'] = os.path.join(os.environ['HIVE_HOME'], 'conf')
+    else:
+        return
+
+    return
+
+def findHCatPrefix(binDir):
+    os.environ['HCAT_PREFIX'] = binDir + '..' + os.path.sep
+    return os.environ['HCAT_PREFIX']
+
+def getHCatLogDir(binDir):
+    return os.path.join(binDir, '..', 'var', 'log')