You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2010/02/26 00:42:37 UTC

svn commit: r916495 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/exec/errors/

Author: zshao
Date: Thu Feb 25 23:42:37 2010
New Revision: 916495

URL: http://svn.apache.org/viewvc?rev=916495&view=rev
Log:
HIVE-1032. Better Error Messages for Execution Errors. (Paul Yang via zshao)

Added:
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorHeuristic.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/MapAggrMemErrorHeuristic.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ScriptErrorHeuristic.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/package-info.java
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=916495&r1=916494&r2=916495&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Feb 25 23:42:37 2010
@@ -33,6 +33,9 @@
     HIVE-1188. NPE when TestJdbcDriver/TestHiveServer
     (Call Steinbach via Ning Zhang)
 
+    HIVE-1032. Better Error Messages for Execution Errors.
+    (Paul Yang via zshao)
+
   IMPROVEMENTS
     HIVE-983. Function from_unixtime takes long.
     (Ning Zhang via zshao)

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java?rev=916495&r1=916494&r2=916495&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java Thu Feb 25 23:42:37 2010
@@ -52,6 +52,8 @@
 import org.apache.hadoop.hive.ql.DriverContext;
 import org.apache.hadoop.hive.ql.QueryPlan;
 import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
+import org.apache.hadoop.hive.ql.exec.errors.ErrorAndSolution;
+import org.apache.hadoop.hive.ql.exec.errors.TaskLogProcessor;
 import org.apache.hadoop.hive.ql.history.HiveHistory.Keys;
 import org.apache.hadoop.hive.ql.io.HiveKey;
 import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
@@ -70,11 +72,11 @@
 import org.apache.hadoop.mapred.InputFormat;
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Partitioner;
 import org.apache.hadoop.mapred.RunningJob;
 import org.apache.hadoop.mapred.TaskCompletionEvent;
 import org.apache.log4j.BasicConfigurator;
 import org.apache.log4j.varia.NullAppender;
-import org.apache.hadoop.mapred.Partitioner;
 
 /**
  * ExecDriver.
@@ -104,7 +106,7 @@
     SessionState ss = SessionState.get();
     Set<String> files = (ss == null) ? null : ss.list_resource(t, null);
     if (files != null) {
-      ArrayList<String> realFiles = new ArrayList<String>(files.size());
+      List<String> realFiles = new ArrayList<String>(files.size());
       for (String one : files) {
         try {
           realFiles.add(Utilities.realFile(one, conf));
@@ -259,6 +261,7 @@
       rj = job;
     }
 
+    @Override
     public Counters getCounters() throws IOException {
       return rj.getCounters();
     }
@@ -294,6 +297,7 @@
     }
   }
 
+  @Override
   public void progress(TaskHandle taskHandle) throws IOException {
     ExecDriverTaskHandle th = (ExecDriverTaskHandle) taskHandle;
     JobClient jc = th.getJobClient();
@@ -503,6 +507,7 @@
   /**
    * Execute a query plan using Hadoop.
    */
+  @Override
   public int execute() {
 
     success = true;
@@ -762,17 +767,47 @@
     return "Ended Job = " + jobId;
   }
 
-  private void showJobFailDebugInfo(JobConf conf, RunningJob rj) throws IOException {
+  private String getTaskAttemptLogUrl(String taskTrackerHttpAddress,
+      String taskAttemptId) {
+    return taskTrackerHttpAddress + "/tasklog?taskid=" + taskAttemptId + "&all=true";
+  }
+
+  // Used for showJobFailDebugInfo
+  private static class TaskInfo {
+    String jobId;
+    HashSet<String> logUrls;
+
+    public TaskInfo(String jobId) {
+      this.jobId = jobId;
+      logUrls = new HashSet<String>();
+    }
+    public void addLogUrl(String logUrl) {
+      logUrls.add(logUrl);
+    }
+    public HashSet<String> getLogUrls() {
+      return logUrls;
+    }
+    public String getJobId() {
+      return jobId;
+    }
+  }
 
+  @SuppressWarnings("deprecation")
+  private void showJobFailDebugInfo(JobConf conf, RunningJob rj) throws IOException {
+    // Mapping from task ID to the number of failures
     Map<String, Integer> failures = new HashMap<String, Integer>();
+    // Successful task ID's
     Set<String> successes = new HashSet<String>();
-    Map<String, String> taskToJob = new HashMap<String, String>();
+
+    Map<String, TaskInfo> taskIdToInfo = new HashMap<String, TaskInfo>();
 
     int startIndex = 0;
 
+    // Loop to get all task completion events because getTaskCompletionEvents
+    // only returns a subset per call
     while (true) {
-      TaskCompletionEvent[] taskCompletions = rj
-          .getTaskCompletionEvents(startIndex);
+      TaskCompletionEvent[] taskCompletions =
+        rj.getTaskCompletionEvents(startIndex);
 
       if (taskCompletions == null || taskCompletions.length == 0) {
         break;
@@ -780,21 +815,35 @@
 
       boolean more = true;
       for (TaskCompletionEvent t : taskCompletions) {
-        // getTaskJobIDs return Strings for compatibility with Hadoop version
-        // without
-        // TaskID or TaskAttemptID
+        // getTaskJobIDs returns Strings for compatibility with Hadoop versions
+        // without TaskID or TaskAttemptID
         String[] taskJobIds = ShimLoader.getHadoopShims().getTaskJobIDs(t);
 
         if (taskJobIds == null) {
-          console
-              .printError("Task attempt info is unavailable in this Hadoop version");
+          console.printError("Task attempt info is unavailable in " +
+                             "this Hadoop version");
           more = false;
           break;
         }
 
+        // For each task completion event, get the associated task id, job id
+        // and the logs
         String taskId = taskJobIds[0];
         String jobId = taskJobIds[1];
-        taskToJob.put(taskId, jobId);
+
+        TaskInfo ti = taskIdToInfo.get(taskId);
+        if(ti == null) {
+          ti = new TaskInfo(jobId);
+          taskIdToInfo.put(taskId, ti);
+        }
+        // These tasks should have come from the same job.
+        assert(ti.getJobId() == jobId);
+        ti.getLogUrls().add(
+            getTaskAttemptLogUrl(t.getTaskTrackerHttp(), t.getTaskId()));
+
+        // If a task failed, then keep track of the total number of failures
+        // for that task (typically, a task gets re-run up to 4 times if it
+        // fails
 
         if (t.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED) {
           Integer failAttempts = failures.get(taskId);
@@ -830,16 +879,42 @@
     }
 
     // Display Error Message for tasks with the highest failure count
-    console.printError("\nFailed tasks with most" + "(" + maxFailures + ")"
-        + " failures " + ": ");
     String jtUrl = JobTrackerURLResolver.getURL(conf);
 
     for (String task : failures.keySet()) {
       if (failures.get(task).intValue() == maxFailures) {
-        String jobId = taskToJob.get(task);
-        String taskUrl = jtUrl + "/taskdetails.jsp?jobid=" + jobId + "&tipid="
-            + task.toString();
-        console.printError("Task URL: " + taskUrl + "\n");
+        TaskInfo ti = taskIdToInfo.get(task);
+        String jobId = ti.getJobId();
+        String taskUrl = jtUrl + "/taskdetails.jsp?jobid=" + jobId + "&tipid=" +
+        task.toString();
+
+        TaskLogProcessor tlp = new TaskLogProcessor(conf);
+        for(String logUrl : ti.getLogUrls()) {
+          tlp.addTaskAttemptLogUrl(logUrl);
+        }
+
+        List<ErrorAndSolution> errors = tlp.getErrors();
+
+        StringBuilder sb = new StringBuilder();
+        // We use a StringBuilder and then call printError only once as
+        // printError will write to both stderr and the error log file. In
+        // situations where both the stderr and the log file output is
+        // simultaneously output to a single stream, this will look cleaner.
+        sb.append("\n");
+        sb.append("Task with the most failures(" + maxFailures + "): \n");
+        sb.append("-----\n");
+        sb.append("Task ID:\n  " + task + "\n\n");
+        sb.append("URL:\n  " + taskUrl + "\n");
+
+        for(ErrorAndSolution e : errors) {
+          sb.append("\n");
+          sb.append("Possible error:\n  " + e.getError() + "\n\n");
+          sb.append("Solution:\n  " + e.getSolution() + "\n");
+        }
+        sb.append("-----\n");
+
+        console.printError(sb.toString());
+
         // Only print out one task because that's good enough for debugging.
         break;
       }
@@ -1165,6 +1240,7 @@
     }
   }
 
+  @Override
   public int getType() {
     return StageType.MAPRED;
   }

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java?rev=916495&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java Thu Feb 25 23:42:37 2010
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.errors;
+
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Detects the condition where there is a error with one of the input files in
+ * the query.
+ *
+ * Conditions to check:
+ * 1. EOFException in log
+ * 2. A line indicating the split file in the log. This is needed to
+ *    generate the proper error message.
+ *
+ */
+
+public class DataCorruptErrorHeuristic extends RegexErrorHeuristic {
+
+  private static final String SPLIT_REGEX = "split:.*";
+  private static final String EXCEPTION_REGEX = "EOFException";
+
+  public DataCorruptErrorHeuristic() {
+    setQueryRegex(".*");
+    getLogRegexes().add(SPLIT_REGEX);
+    getLogRegexes().add(EXCEPTION_REGEX);
+  }
+
+  @Override
+  public ErrorAndSolution getErrorAndSolution() {
+    ErrorAndSolution es = null;
+
+    if(getQueryMatches()) {
+      Map<String, List<String>> rll = getRegexToLogLines();
+      if (rll.get(EXCEPTION_REGEX).size() > 0 &&
+          rll.get(SPLIT_REGEX).size() > 0) {
+
+        // There should only be a single split line...
+        assert(rll.get(SPLIT_REGEX).size()==1);
+        String splitLogLine = rll.get(SPLIT_REGEX).get(0);
+
+        // Extract only 'split: hdfs://...'
+        Pattern p = Pattern.compile(SPLIT_REGEX, Pattern.CASE_INSENSITIVE);
+        Matcher m = p.matcher(splitLogLine);
+        m.find();
+        String splitStr = m.group();
+
+        es = new ErrorAndSolution(
+            "Data file " + splitStr + " is corrupted.",
+            "Replace file. i.e. by re-running the query that produced the " +
+            "source table / partition.");
+      }
+    }
+    reset();
+    return es;
+  }
+}

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java?rev=916495&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java Thu Feb 25 23:42:37 2010
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.errors;
+
+/**
+ * Immutable class for storing a possible error and a resolution suggestion.
+ */
+public class ErrorAndSolution {
+
+  private String error = null;
+  private String solution = null;
+
+  ErrorAndSolution(String error, String solution) {
+    this.error = error;
+    this.solution = solution;
+  }
+
+  public String getError() {
+    return error;
+  }
+
+  public String getSolution() {
+    return solution;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (!(o instanceof ErrorAndSolution)) {
+      return false;
+    }
+    ErrorAndSolution e = (ErrorAndSolution)o;
+
+    return e.error == this.error && e.solution == this.solution;
+  }
+
+  @Override
+  public int hashCode() {
+    return error.hashCode() * 37 + solution.hashCode();
+  }
+}

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorHeuristic.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorHeuristic.java?rev=916495&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorHeuristic.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorHeuristic.java Thu Feb 25 23:42:37 2010
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.errors;
+
+import org.apache.hadoop.mapred.JobConf;
+
+/**
+ * Classes implementing ErrorHeuristic are able to generate a possible cause and
+ * solution for Hive jobs that have failed by examining the query, task log
+ * files, and the job configuration.
+ *
+ * A class implementing ErrorHeuristic should only detect one type of error.
+ *
+ */
+public interface ErrorHeuristic {
+
+  /**
+   * Initialize this error heuristic. Must be called before any other methods
+   * are called
+   * @param query
+   * @param jobConf
+   */
+  void init(String query, JobConf jobConf);
+
+  /**
+   * Process the given log line. It should be called for every line in the task
+   * log file, in sequence.
+   *
+   * @param line
+   */
+  void processLogLine(String line);
+
+  /**
+   * Examine the hive query, job configuration, and the lines from the task log
+   * seen so far though processLogLine() and generate a possible cause/solution.
+   * Once this method is called, the implementing class should be reset to the
+   * state before any processLogLine() calls were made.
+   *
+   * @return a matching error, or null if a suitable match wasn't found.
+   *
+   */
+  ErrorAndSolution getErrorAndSolution();
+}

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/MapAggrMemErrorHeuristic.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/MapAggrMemErrorHeuristic.java?rev=916495&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/MapAggrMemErrorHeuristic.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/MapAggrMemErrorHeuristic.java Thu Feb 25 23:42:37 2010
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.errors;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.mapred.JobConf;
+
+/**
+ * Detects out-of-memory errors when hash tables in map-based aggregation group
+ * by queries take up too much memory.
+ *
+ * Conditions to check
+ *
+ * 1. The query contains a group by.
+ * 2. Map-side aggregation is turned on.
+ * 3. There is a out of memory exception in the log.
+ */
+public class MapAggrMemErrorHeuristic extends RegexErrorHeuristic {
+
+  private static final String OUT_OF_MEMORY_REGEX = "OutOfMemoryError";
+  private boolean configMatches = false;
+
+  public MapAggrMemErrorHeuristic() {
+    setQueryRegex("group by");
+    getLogRegexes().add(OUT_OF_MEMORY_REGEX);
+  }
+
+  @Override
+  public void init(String query, JobConf conf) {
+    super.init(query, conf);
+    configMatches = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE);
+  }
+
+  @Override
+  public ErrorAndSolution getErrorAndSolution() {
+    ErrorAndSolution es = null;
+    if(getQueryMatches() && configMatches) {
+      List<String> matchingLines = getRegexToLogLines().get(OUT_OF_MEMORY_REGEX);
+
+      if (matchingLines.size() > 0) {
+        String confName = HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY.toString();
+        float confValue =  HiveConf.getFloatVar(getConf(),
+            HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
+
+        es = new ErrorAndSolution(
+            "Out of memory due to hash maps used in map-side aggregation.",
+            "Currently " + confName + " is set to " + confValue + ". " +
+            "Try setting it to a lower value. i.e " +
+            "'set " + confName + " = " + confValue/2 + ";'");
+      }
+    }
+    reset();
+    return es;
+  }
+}

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java?rev=916495&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java Thu Feb 25 23:42:37 2010
@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.errors;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Map.Entry;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.mapred.JobConf;
+
+/**
+ * Simple heuristic where the query and the lines of the task log file are run
+ * through regular expressions to see if they resemble a known error condition.
+ *
+ * Only a single regular expression can be supplied to match the query whereas
+ * multiple regular expressions can be supplied to match lines from the log file.
+ * A mapping is maintained from the regular expression to the lines from the log
+ * file that it matched.
+ */
+public abstract class RegexErrorHeuristic implements ErrorHeuristic {
+
+  private String query = null;
+  private JobConf conf = null;
+
+  // Pattern to look for in the hive query and whether it matched
+  private String queryRegex = null;
+  private boolean queryMatches = false;
+
+  // The regexes to look for in the log files
+  private final Set<String> logRegexes = new HashSet<String>();
+
+  // Mapping from the regex to lines in the log file where find() == true
+  private final Map<String, List<String>> regexToLogLines = new HashMap<String, List<String>>();
+  private final Map<String, Pattern> regexToPattern = new HashMap<String, Pattern>();
+
+  public RegexErrorHeuristic() {
+  }
+
+  protected void setQueryRegex(String queryRegex) {
+    this.queryRegex  = queryRegex;
+  }
+
+  protected String getQueryRegex() {
+    return queryRegex;
+  }
+
+  protected boolean getQueryMatches() {
+    return queryMatches;
+  }
+
+  protected Set<String> getLogRegexes() {
+    return this.logRegexes;
+  }
+
+  protected Map<String, List<String>> getRegexToLogLines() {
+    return this.regexToLogLines;
+  }
+
+  protected JobConf getConf() {
+    return conf;
+  }
+
+  @Override
+  /**
+   * Before init is called, logRegexes and queryRegexes should be populated.
+   */
+  public void init(String query, JobConf conf) {
+    this.query = query;
+    this.conf = conf;
+
+    assert((logRegexes!=null) && (queryRegex != null));
+
+    Pattern queryPattern = Pattern.compile(queryRegex, Pattern.CASE_INSENSITIVE);
+    queryMatches = queryPattern.matcher(query).find();
+
+    for(String regex : logRegexes) {
+      regexToPattern.put(regex, Pattern.compile(regex, Pattern.CASE_INSENSITIVE));
+      regexToLogLines.put(regex, new ArrayList<String>());
+    }
+
+  }
+
+  @Override
+  public abstract ErrorAndSolution getErrorAndSolution();
+
+  @Override
+  public void processLogLine(String line) {
+    if(queryMatches) {
+      for(Entry<String, Pattern> e : regexToPattern.entrySet()) {
+        String regex = e.getKey();
+        Pattern p = e.getValue();
+        boolean lineMatches = p.matcher(line).find();
+        if(lineMatches) {
+          regexToLogLines.get(regex).add(line);
+       }
+      }
+    }
+  }
+
+  /**
+   * Resets to state before any processLogLine() calls.
+   */
+  protected void reset() {
+    for(List<String> lst : regexToLogLines.values()) {
+      lst.clear();
+    }
+  }
+}

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ScriptErrorHeuristic.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ScriptErrorHeuristic.java?rev=916495&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ScriptErrorHeuristic.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ScriptErrorHeuristic.java Thu Feb 25 23:42:37 2010
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.errors;
+
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Detects when a query has failed because a user's script that was specified in
+ * transform returns a non-zero error code.
+ *
+ * Conditions to check:
+ *
+ * 1. "Script failed with code <some number>" is in the log
+ *
+ */
+
+public class ScriptErrorHeuristic extends RegexErrorHeuristic {
+
+  private static final String FAILED_REGEX = "Script failed with code [0-9]+";
+
+  public ScriptErrorHeuristic() {
+    setQueryRegex(".*");
+    getLogRegexes().add(FAILED_REGEX);
+  }
+
+  @Override
+  public ErrorAndSolution getErrorAndSolution() {
+    ErrorAndSolution es = null;
+
+    if(getQueryMatches()) {
+      for(List<String> matchingLines : getRegexToLogLines().values()) {
+        // There should really only be one line with "Script failed..."
+        if (matchingLines.size() > 0) {
+          assert(matchingLines.size() == 1);
+
+          // Get "Script failed with code <some number>"
+          Matcher m1 = Pattern.compile(FAILED_REGEX).matcher(matchingLines.get(0));
+          m1.find();
+          String failedStr = m1.group();
+
+          // Get "<some number>"
+          Matcher m2 = Pattern.compile("[0-9]+").matcher(failedStr);
+          m2.find();
+          String errorCode = m2.group();
+
+          es = new ErrorAndSolution(
+            "A user-supplied transfrom script has exited with error code " +
+            errorCode + " instead of 0.",
+            "Verify that the script can properly handle all the input rows " +
+            "without throwing exceptions and exits properly.");
+        }
+      }
+    }
+
+    reset();
+    return es;
+  }
+}

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java?rev=916495&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java Thu Feb 25 23:42:37 2010
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.errors;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.mapred.JobConf;
+
+/**
+ * TaskLogProcessor reads the logs from failed task attempts and tries to figure
+ * out what the cause of the error was using various heuristics.
+ */
+public class TaskLogProcessor {
+
+  private final Map<ErrorHeuristic, HeuristicStats> heuristics =
+    new HashMap<ErrorHeuristic, HeuristicStats>();
+  private final List<String> taskLogUrls = new ArrayList<String>();
+
+  private JobConf conf = null;
+  // Query is the hive query string i.e. "SELECT * FROM src;" associated with
+  // this set of tasks logs
+  private String query = null;
+
+  public TaskLogProcessor(JobConf conf) {
+    this.conf = conf;
+    query = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYSTRING);
+
+    heuristics.put(new ScriptErrorHeuristic(), new HeuristicStats());
+    heuristics.put(new MapAggrMemErrorHeuristic(), new HeuristicStats());
+    heuristics.put(new DataCorruptErrorHeuristic(), new HeuristicStats());
+    for(ErrorHeuristic e : heuristics.keySet()) {
+      e.init(query, conf);
+    }
+  }
+
+  /**
+   * Adds a task log URL for the heuristics to read through.
+   * @param url
+   */
+  public void addTaskAttemptLogUrl(String url) {
+    taskLogUrls.add(url);
+  }
+
+  private static class HeuristicStats {
+
+    // The number of times eh has returned non-null errors
+    private int triggerCount = 0;
+    // All ErrorAndSolutions that ErrorHeuristic has generated. For the same error, they
+    // should be the same though it's possible that different file paths etc
+    // could generate different error messages
+    private final List<ErrorAndSolution> ens = new ArrayList<ErrorAndSolution>();
+
+    HeuristicStats() {
+    }
+
+    int getTriggerCount() {
+      return triggerCount;
+    }
+
+    void incTriggerCount() {
+      triggerCount++;
+    }
+
+    List<ErrorAndSolution> getErrorAndSolutions() {
+      return ens;
+    }
+
+    void addErrorAndSolution(ErrorAndSolution e) {
+      ens.add(e);
+    }
+  }
+
+  /**
+   * Processes the provided task logs using the known error heuristics to get
+   * the matching errors.
+   * @return A ErrorAndSolution from the ErrorHeuristic that most frequently
+   * generated matches. In case of ties, multiple ErrorAndSolutions will be
+   * returned.
+   */
+  public List<ErrorAndSolution> getErrors() {
+
+    for(String urlString : taskLogUrls) {
+
+      // Open the log file, and read in a line. Then feed the line into
+      // each of the ErrorHeuristics. Repeat for all the lines in the log.
+      URL taskAttemptLogUrl;
+      try {
+        taskAttemptLogUrl = new URL(urlString);
+      } catch(MalformedURLException e) {
+        throw new RuntimeException("Bad task log url", e);
+      }
+      BufferedReader in;
+      try {
+        in = new BufferedReader(
+            new InputStreamReader(taskAttemptLogUrl.openStream()));
+        String inputLine;
+        while ((inputLine = in.readLine()) != null) {
+          for(ErrorHeuristic e : heuristics.keySet()) {
+            e.processLogLine(inputLine);
+          }
+        }
+        in.close();
+      } catch (IOException e) {
+        throw new RuntimeException("Error while reading from task log url", e);
+      }
+
+      // Once the lines of the log file have been fed into the ErrorHeuristics,
+      // see if they have detected anything. If any has, record
+      // what ErrorAndSolution it gave so we can later return the most
+      // frequently occurring error
+      for(Entry<ErrorHeuristic, HeuristicStats> ent : heuristics.entrySet()) {
+        ErrorHeuristic eh = ent.getKey();
+        HeuristicStats hs = ent.getValue();
+
+        ErrorAndSolution es = eh.getErrorAndSolution();
+        if(es != null) {
+          hs.incTriggerCount();
+          hs.addErrorAndSolution(es);
+        }
+      }
+
+    }
+
+    // Return the errors that occur the most frequently
+    int max = 0;
+    for(HeuristicStats hs : heuristics.values()) {
+      if(hs.getTriggerCount() > max) {
+        max = hs.getTriggerCount();
+      }
+    }
+
+    List<ErrorAndSolution> errors = new ArrayList<ErrorAndSolution>();
+    for(HeuristicStats hs : heuristics.values()) {
+      if(hs.getTriggerCount() == max) {
+        if(hs.getErrorAndSolutions().size() > 0) {
+          // An error heuristic could have generated different ErrorAndSolution
+          // for each task attempt, but most likely they are the same. Plus,
+          // one of those is probably good enough for debugging
+          errors.add(hs.getErrorAndSolutions().get(0));
+        }
+      }
+    }
+
+    return errors;
+  }
+
+}

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/package-info.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/package-info.java?rev=916495&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/package-info.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/package-info.java Thu Feb 25 23:42:37 2010
@@ -0,0 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Generates execution error messages using heuristics. */
+package org.apache.hadoop.hive.ql.exec.errors;