You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by nz...@apache.org on 2011/10/31 01:25:23 UTC

svn commit: r1195288 - in /hive/trunk: bin/ext/debug.sh bin/ext/help.sh bin/hive conf/hive-env.sh.template ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java

Author: nzhang
Date: Mon Oct 31 00:25:23 2011
New Revision: 1195288

URL: http://svn.apache.org/viewvc?rev=1195288&view=rev
Log:
HIVE-2500. Allow Hive to be debugged remotely (Robert Surowka via Ning Zhang)

Added:
    hive/trunk/bin/ext/debug.sh
Modified:
    hive/trunk/bin/ext/help.sh
    hive/trunk/bin/hive
    hive/trunk/conf/hive-env.sh.template
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java

Added: hive/trunk/bin/ext/debug.sh
URL: http://svn.apache.org/viewvc/hive/trunk/bin/ext/debug.sh?rev=1195288&view=auto
==============================================================================
--- hive/trunk/bin/ext/debug.sh (added)
+++ hive/trunk/bin/ext/debug.sh Mon Oct 31 00:25:23 2011
@@ -0,0 +1,93 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+does_jvm_support_ti(){
+  version=$( java -version 2>&1 | awk -F '"' '/version/ {print $2}')
+  if [[ "$version" < "1.5" ]]; then
+      return 1
+  else
+      return 0
+  fi
+}
+
+set_debug_param(){
+  OIFS=$IFS
+  IFS='='
+  pair=($1)
+  case "${pair[0]}" in
+      recursive)
+      export HIVE_DEBUG_RECURSIVE="${pair[1]}"
+      ;;
+      port)
+      port="address=${pair[1]}"
+      ;;
+      mainSuspend)
+      main_suspend="suspend=${pair[1]}"
+      ;;
+      childSuspend)
+      child_suspend="suspend=${pair[1]}"
+      ;;
+      *)
+      ;;
+  esac
+  IFS=$OIFS;
+}
+
+parse_debug(){
+  IFS=':'
+  read -ra params <<< "$1"
+  IFS=','
+  for param in ${params[1]}; do
+    set_debug_param "$param"
+  done
+  unset IFS
+}
+
+set_debug_defaults(){
+  export HIVE_DEBUG_RECURSIVE="y"
+  port="address=8000"
+  main_suspend="suspend=y"
+  child_suspend="suspend=n"
+}
+
+get_debug_params(){
+  set_debug_defaults
+  parse_debug $1
+
+  # For Debug -XX:+UseParallelGC is needed, as it is a (unfortunately not perfect)
+  # workaround for JVM 6862295 bug, that affects some JVMs still in use
+  if does_jvm_support_ti; then
+    export HIVE_MAIN_CLIENT_DEBUG_OPTS=" -XX:+UseParallelGC -agentlib:jdwp=transport=dt_socket,server=y,$port,$main_suspend"
+    export HIVE_CHILD_CLIENT_DEBUG_OPTS=" -XX:+UseParallelGC -agentlib:jdwp=transport=dt_socket,server=y,$child_suspend"
+  else
+    export HIVE_MAIN_CLIENT_DEBUG_OPTS=" -XX:+UseParallelGC -Xdebug -Xrunjdwp:transport=dt_socket,server=y,$port,$main_suspend"
+    export HIVE_CHILD_CLIENT_DEBUG_OPTS=" -XX:+UseParallelGC -Xdebug -Xrunjdwp:transport=dt_socket,server=y,$child_suspend"
+  fi
+}
+
+debug_help(){
+  echo
+  echo "Allows to debug Hive by connecting to it via JDI API"
+  echo
+  echo "Usage: hive --debug[:comma-separated parameters list]"
+  echo
+  echo "Parameters:"
+  echo
+  echo "recursive=<y|n>             Should child JVMs also be started in debug mode. Default: y"
+  echo "port=<port_number>          Port on which main JVM listens for debug connection. Default: 8000"
+  echo "mainSuspend=<y|n>           Should main JVM wait with execution for the debugger to connect. Default: y"
+  echo "childSuspend=<y|n>          Should child JVMs wait with execution for the debugger to connect. Default: n"
+  echo
+}

Modified: hive/trunk/bin/ext/help.sh
URL: http://svn.apache.org/viewvc/hive/trunk/bin/ext/help.sh?rev=1195288&r1=1195287&r2=1195288&view=diff
==============================================================================
--- hive/trunk/bin/ext/help.sh (original)
+++ hive/trunk/bin/ext/help.sh Mon Oct 31 00:25:23 2011
@@ -28,6 +28,7 @@ help() {
   echo "  HIVE_OPT : Hive options"
   echo "For help on a particular service:"
   echo "  ./hive --service serviceName --help"
+  echo "Debug help:  ./hive --debug --help"
 }
 
 help_help(){

Modified: hive/trunk/bin/hive
URL: http://svn.apache.org/viewvc/hive/trunk/bin/hive?rev=1195288&r1=1195287&r2=1195288&view=diff
==============================================================================
--- hive/trunk/bin/hive (original)
+++ hive/trunk/bin/hive Mon Oct 31 00:25:23 2011
@@ -42,6 +42,10 @@ while [ $# -gt 0 ]; do
       HELP=_help
       shift
       ;;
+    --debug*)
+      DEBUG=$1
+      shift
+      ;;
     *)
       break
       ;;
@@ -197,6 +201,16 @@ for i in "$bin"/ext/util/*.sh ; do
   . $i
 done
 
+if [ "$DEBUG" ]; then
+  if [ "$HELP" ]; then
+    debug_help
+    exit 0
+  else
+    get_debug_params "$DEBUG"
+    export HADOOP_CLIENT_OPTS="$HADOOP_CLIENT_OPTS $HIVE_MAIN_CLIENT_DEBUG_OPTS"
+  fi
+fi
+
 TORUN=""
 for j in $SERVICE_LIST ; do
   if [ "$j" = "$SERVICE" ] ; then

Modified: hive/trunk/conf/hive-env.sh.template
URL: http://svn.apache.org/viewvc/hive/trunk/conf/hive-env.sh.template?rev=1195288&r1=1195287&r2=1195288&view=diff
==============================================================================
--- hive/trunk/conf/hive-env.sh.template (original)
+++ hive/trunk/conf/hive-env.sh.template Mon Oct 31 00:25:23 2011
@@ -12,7 +12,11 @@
 # reducing memory usage:
 #
 # if [ "$SERVICE" = "cli" ]; then
-#   export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit"
+#   if [ -z "$DEBUG" ]; then
+#     export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:+UseParNewGC -XX:-UseGCOverheadLimit"
+#   else
+#     export HADOOP_OPTS="$HADOOP_OPTS -XX:NewRatio=12 -Xms10m -XX:MaxHeapFreeRatio=40 -XX:MinHeapFreeRatio=15 -XX:-UseGCOverheadLimit"
+#   fi
 # fi
 
 # The heap size of the jvm stared by hive shell script can be controlled via:

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java?rev=1195288&r1=1195287&r2=1195288&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java Mon Oct 31 00:25:23 2011
@@ -55,6 +55,10 @@ public class MapRedTask extends ExecDriv
 
   static final String HADOOP_MEM_KEY = "HADOOP_HEAPSIZE";
   static final String HADOOP_OPTS_KEY = "HADOOP_OPTS";
+  static final String HADOOP_CLIENT_OPTS = "HADOOP_CLIENT_OPTS";
+  static final String HIVE_DEBUG_RECURSIVE = "HIVE_DEBUG_RECURSIVE";
+  static final String HIVE_MAIN_CLIENT_DEBUG_OPTS = "HIVE_MAIN_CLIENT_DEBUG_OPTS";
+  static final String HIVE_CHILD_CLIENT_DEBUG_OPTS = "HIVE_CHILD_CLIENT_DEBUG_OPTS";
   static final String[] HIVE_SYS_PROP = {"build.dir", "build.dir.hive"};
 
   private transient ContentSummary inputSummary = null;
@@ -247,6 +251,11 @@ public class MapRedTask extends ExecDriv
       } else {
         variables.put(HADOOP_OPTS_KEY, hadoopOpts);
       }
+
+      if(variables.containsKey(HIVE_DEBUG_RECURSIVE)) {
+        configureDebugVariablesForChildJVM(variables);
+      }
+
       env = new String[variables.size()];
       int pos = 0;
       for (Map.Entry<String, String> entry : variables.entrySet()) {
@@ -294,6 +303,48 @@ public class MapRedTask extends ExecDriv
     }
   }
 
+  private void configureDebugVariablesForChildJVM(Map<String, String> environmentVariables) {
+    // this method contains various asserts to warn if environment variables are in a buggy state
+    assert environmentVariables.containsKey(HADOOP_CLIENT_OPTS)
+        && environmentVariables.get(HADOOP_CLIENT_OPTS) != null : HADOOP_CLIENT_OPTS
+        + " environment variable must be set when JVM in debug mode";
+
+    String hadoopClientOpts = environmentVariables.get(HADOOP_CLIENT_OPTS);
+
+    assert environmentVariables.containsKey(HIVE_MAIN_CLIENT_DEBUG_OPTS)
+        && environmentVariables.get(HIVE_MAIN_CLIENT_DEBUG_OPTS) != null : HIVE_MAIN_CLIENT_DEBUG_OPTS
+        + " environment variable must be set when JVM in debug mode";
+
+    assert hadoopClientOpts.contains(environmentVariables.get(HIVE_MAIN_CLIENT_DEBUG_OPTS)) : HADOOP_CLIENT_OPTS
+        + " environment variable must contain debugging parameters, when JVM in debugging mode";
+
+    assert "y".equals(environmentVariables.get(HIVE_DEBUG_RECURSIVE))
+        || "n".equals(environmentVariables.get(HIVE_DEBUG_RECURSIVE)) : HIVE_DEBUG_RECURSIVE
+        + " environment variable must be set to \"y\" or \"n\" when debugging";
+
+    if (environmentVariables.get(HIVE_DEBUG_RECURSIVE).equals("y")) {
+      // swap debug options in HADOOP_CLIENT_OPTS to those that the child JVM should have
+      assert environmentVariables.containsKey(HIVE_CHILD_CLIENT_DEBUG_OPTS)
+          && environmentVariables.get(HIVE_MAIN_CLIENT_DEBUG_OPTS) != null : HIVE_CHILD_CLIENT_DEBUG_OPTS
+          + " environment variable must be set when JVM in debug mode";
+      String newHadoopClientOpts = hadoopClientOpts.replace(
+          environmentVariables.get(HIVE_MAIN_CLIENT_DEBUG_OPTS),
+          environmentVariables.get(HIVE_CHILD_CLIENT_DEBUG_OPTS));
+      environmentVariables.put(HADOOP_CLIENT_OPTS, newHadoopClientOpts);
+    } else {
+      // remove from HADOOP_CLIENT_OPTS any debug related options
+      String newHadoopClientOpts = hadoopClientOpts.replace(
+          environmentVariables.get(HIVE_MAIN_CLIENT_DEBUG_OPTS), "").trim();
+      if (newHadoopClientOpts.isEmpty()) {
+        environmentVariables.remove(HADOOP_CLIENT_OPTS);
+      } else {
+        environmentVariables.put(HADOOP_CLIENT_OPTS, newHadoopClientOpts);
+      }
+    }
+    // child JVM won't need to change debug parameters when creating it's own children
+    environmentVariables.remove(HIVE_DEBUG_RECURSIVE);
+  }
+
   @Override
   public boolean mapStarted() {
     boolean b = super.mapStarted();