You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2010/02/26 00:42:37 UTC
svn commit: r916495 - in /hadoop/hive/trunk: ./
ql/src/java/org/apache/hadoop/hive/ql/exec/
ql/src/java/org/apache/hadoop/hive/ql/exec/errors/
Author: zshao
Date: Thu Feb 25 23:42:37 2010
New Revision: 916495
URL: http://svn.apache.org/viewvc?rev=916495&view=rev
Log:
HIVE-1032. Better Error Messages for Execution Errors. (Paul Yang via zshao)
Added:
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorHeuristic.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/MapAggrMemErrorHeuristic.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ScriptErrorHeuristic.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/package-info.java
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=916495&r1=916494&r2=916495&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Feb 25 23:42:37 2010
@@ -33,6 +33,9 @@
HIVE-1188. NPE when TestJdbcDriver/TestHiveServer
(Call Steinbach via Ning Zhang)
+ HIVE-1032. Better Error Messages for Execution Errors.
+ (Paul Yang via zshao)
+
IMPROVEMENTS
HIVE-983. Function from_unixtime takes long.
(Ning Zhang via zshao)
Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java?rev=916495&r1=916494&r2=916495&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java Thu Feb 25 23:42:37 2010
@@ -52,6 +52,8 @@
import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter;
+import org.apache.hadoop.hive.ql.exec.errors.ErrorAndSolution;
+import org.apache.hadoop.hive.ql.exec.errors.TaskLogProcessor;
import org.apache.hadoop.hive.ql.history.HiveHistory.Keys;
import org.apache.hadoop.hive.ql.io.HiveKey;
import org.apache.hadoop.hive.ql.io.HiveOutputFormat;
@@ -70,11 +72,11 @@
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.Partitioner;
import org.apache.hadoop.mapred.RunningJob;
import org.apache.hadoop.mapred.TaskCompletionEvent;
import org.apache.log4j.BasicConfigurator;
import org.apache.log4j.varia.NullAppender;
-import org.apache.hadoop.mapred.Partitioner;
/**
* ExecDriver.
@@ -104,7 +106,7 @@
SessionState ss = SessionState.get();
Set<String> files = (ss == null) ? null : ss.list_resource(t, null);
if (files != null) {
- ArrayList<String> realFiles = new ArrayList<String>(files.size());
+ List<String> realFiles = new ArrayList<String>(files.size());
for (String one : files) {
try {
realFiles.add(Utilities.realFile(one, conf));
@@ -259,6 +261,7 @@
rj = job;
}
+ @Override
public Counters getCounters() throws IOException {
return rj.getCounters();
}
@@ -294,6 +297,7 @@
}
}
+ @Override
public void progress(TaskHandle taskHandle) throws IOException {
ExecDriverTaskHandle th = (ExecDriverTaskHandle) taskHandle;
JobClient jc = th.getJobClient();
@@ -503,6 +507,7 @@
/**
* Execute a query plan using Hadoop.
*/
+ @Override
public int execute() {
success = true;
@@ -762,17 +767,47 @@
return "Ended Job = " + jobId;
}
- private void showJobFailDebugInfo(JobConf conf, RunningJob rj) throws IOException {
+ private String getTaskAttemptLogUrl(String taskTrackerHttpAddress,
+ String taskAttemptId) {
+ return taskTrackerHttpAddress + "/tasklog?taskid=" + taskAttemptId + "&all=true";
+ }
+
+ // Used for showJobFailDebugInfo
+ private static class TaskInfo {
+ String jobId;
+ HashSet<String> logUrls;
+
+ public TaskInfo(String jobId) {
+ this.jobId = jobId;
+ logUrls = new HashSet<String>();
+ }
+ public void addLogUrl(String logUrl) {
+ logUrls.add(logUrl);
+ }
+ public HashSet<String> getLogUrls() {
+ return logUrls;
+ }
+ public String getJobId() {
+ return jobId;
+ }
+ }
+ @SuppressWarnings("deprecation")
+ private void showJobFailDebugInfo(JobConf conf, RunningJob rj) throws IOException {
+ // Mapping from task ID to the number of failures
Map<String, Integer> failures = new HashMap<String, Integer>();
+ // Successful task ID's
Set<String> successes = new HashSet<String>();
- Map<String, String> taskToJob = new HashMap<String, String>();
+
+ Map<String, TaskInfo> taskIdToInfo = new HashMap<String, TaskInfo>();
int startIndex = 0;
+ // Loop to get all task completion events because getTaskCompletionEvents
+ // only returns a subset per call
while (true) {
- TaskCompletionEvent[] taskCompletions = rj
- .getTaskCompletionEvents(startIndex);
+ TaskCompletionEvent[] taskCompletions =
+ rj.getTaskCompletionEvents(startIndex);
if (taskCompletions == null || taskCompletions.length == 0) {
break;
@@ -780,21 +815,35 @@
boolean more = true;
for (TaskCompletionEvent t : taskCompletions) {
- // getTaskJobIDs return Strings for compatibility with Hadoop version
- // without
- // TaskID or TaskAttemptID
+ // getTaskJobIDs returns Strings for compatibility with Hadoop versions
+ // without TaskID or TaskAttemptID
String[] taskJobIds = ShimLoader.getHadoopShims().getTaskJobIDs(t);
if (taskJobIds == null) {
- console
- .printError("Task attempt info is unavailable in this Hadoop version");
+ console.printError("Task attempt info is unavailable in " +
+ "this Hadoop version");
more = false;
break;
}
+ // For each task completion event, get the associated task id, job id
+ // and the logs
String taskId = taskJobIds[0];
String jobId = taskJobIds[1];
- taskToJob.put(taskId, jobId);
+
+ TaskInfo ti = taskIdToInfo.get(taskId);
+ if(ti == null) {
+ ti = new TaskInfo(jobId);
+ taskIdToInfo.put(taskId, ti);
+ }
+ // These tasks should have come from the same job.
+ assert(ti.getJobId() == jobId);
+ ti.getLogUrls().add(
+ getTaskAttemptLogUrl(t.getTaskTrackerHttp(), t.getTaskId()));
+
+ // If a task failed, then keep track of the total number of failures
+ // for that task (typically, a task gets re-run up to 4 times if it
+ // fails
if (t.getTaskStatus() != TaskCompletionEvent.Status.SUCCEEDED) {
Integer failAttempts = failures.get(taskId);
@@ -830,16 +879,42 @@
}
// Display Error Message for tasks with the highest failure count
- console.printError("\nFailed tasks with most" + "(" + maxFailures + ")"
- + " failures " + ": ");
String jtUrl = JobTrackerURLResolver.getURL(conf);
for (String task : failures.keySet()) {
if (failures.get(task).intValue() == maxFailures) {
- String jobId = taskToJob.get(task);
- String taskUrl = jtUrl + "/taskdetails.jsp?jobid=" + jobId + "&tipid="
- + task.toString();
- console.printError("Task URL: " + taskUrl + "\n");
+ TaskInfo ti = taskIdToInfo.get(task);
+ String jobId = ti.getJobId();
+ String taskUrl = jtUrl + "/taskdetails.jsp?jobid=" + jobId + "&tipid=" +
+ task.toString();
+
+ TaskLogProcessor tlp = new TaskLogProcessor(conf);
+ for(String logUrl : ti.getLogUrls()) {
+ tlp.addTaskAttemptLogUrl(logUrl);
+ }
+
+ List<ErrorAndSolution> errors = tlp.getErrors();
+
+ StringBuilder sb = new StringBuilder();
+ // We use a StringBuilder and then call printError only once as
+ // printError will write to both stderr and the error log file. In
+ // situations where both the stderr and the log file output is
+ // simultaneously output to a single stream, this will look cleaner.
+ sb.append("\n");
+ sb.append("Task with the most failures(" + maxFailures + "): \n");
+ sb.append("-----\n");
+ sb.append("Task ID:\n " + task + "\n\n");
+ sb.append("URL:\n " + taskUrl + "\n");
+
+ for(ErrorAndSolution e : errors) {
+ sb.append("\n");
+ sb.append("Possible error:\n " + e.getError() + "\n\n");
+ sb.append("Solution:\n " + e.getSolution() + "\n");
+ }
+ sb.append("-----\n");
+
+ console.printError(sb.toString());
+
// Only print out one task because that's good enough for debugging.
break;
}
@@ -1165,6 +1240,7 @@
}
}
+ @Override
public int getType() {
return StageType.MAPRED;
}
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java?rev=916495&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/DataCorruptErrorHeuristic.java Thu Feb 25 23:42:37 2010
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.errors;
+
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Detects the condition where there is a error with one of the input files in
+ * the query.
+ *
+ * Conditions to check:
+ * 1. EOFException in log
+ * 2. A line indicating the split file in the log. This is needed to
+ * generate the proper error message.
+ *
+ */
+
+public class DataCorruptErrorHeuristic extends RegexErrorHeuristic {
+
+ private static final String SPLIT_REGEX = "split:.*";
+ private static final String EXCEPTION_REGEX = "EOFException";
+
+ public DataCorruptErrorHeuristic() {
+ setQueryRegex(".*");
+ getLogRegexes().add(SPLIT_REGEX);
+ getLogRegexes().add(EXCEPTION_REGEX);
+ }
+
+ @Override
+ public ErrorAndSolution getErrorAndSolution() {
+ ErrorAndSolution es = null;
+
+ if(getQueryMatches()) {
+ Map<String, List<String>> rll = getRegexToLogLines();
+ if (rll.get(EXCEPTION_REGEX).size() > 0 &&
+ rll.get(SPLIT_REGEX).size() > 0) {
+
+ // There should only be a single split line...
+ assert(rll.get(SPLIT_REGEX).size()==1);
+ String splitLogLine = rll.get(SPLIT_REGEX).get(0);
+
+ // Extract only 'split: hdfs://...'
+ Pattern p = Pattern.compile(SPLIT_REGEX, Pattern.CASE_INSENSITIVE);
+ Matcher m = p.matcher(splitLogLine);
+ m.find();
+ String splitStr = m.group();
+
+ es = new ErrorAndSolution(
+ "Data file " + splitStr + " is corrupted.",
+ "Replace file. i.e. by re-running the query that produced the " +
+ "source table / partition.");
+ }
+ }
+ reset();
+ return es;
+ }
+}
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java?rev=916495&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorAndSolution.java Thu Feb 25 23:42:37 2010
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.errors;
+
+/**
+ * Immutable class for storing a possible error and a resolution suggestion.
+ */
+public class ErrorAndSolution {
+
+ private String error = null;
+ private String solution = null;
+
+ ErrorAndSolution(String error, String solution) {
+ this.error = error;
+ this.solution = solution;
+ }
+
+ public String getError() {
+ return error;
+ }
+
+ public String getSolution() {
+ return solution;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (!(o instanceof ErrorAndSolution)) {
+ return false;
+ }
+ ErrorAndSolution e = (ErrorAndSolution)o;
+
+ return e.error == this.error && e.solution == this.solution;
+ }
+
+ @Override
+ public int hashCode() {
+ return error.hashCode() * 37 + solution.hashCode();
+ }
+}
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorHeuristic.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorHeuristic.java?rev=916495&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorHeuristic.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ErrorHeuristic.java Thu Feb 25 23:42:37 2010
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.errors;
+
+import org.apache.hadoop.mapred.JobConf;
+
+/**
+ * Classes implementing ErrorHeuristic are able to generate a possible cause and
+ * solution for Hive jobs that have failed by examining the query, task log
+ * files, and the job configuration.
+ *
+ * A class implementing ErrorHeuristic should only detect one type of error.
+ *
+ */
+public interface ErrorHeuristic {
+
+ /**
+ * Initialize this error heuristic. Must be called before any other methods
+ * are called
+ * @param query
+ * @param jobConf
+ */
+ void init(String query, JobConf jobConf);
+
+ /**
+ * Process the given log line. It should be called for every line in the task
+ * log file, in sequence.
+ *
+ * @param line
+ */
+ void processLogLine(String line);
+
+ /**
+ * Examine the hive query, job configuration, and the lines from the task log
+ * seen so far though processLogLine() and generate a possible cause/solution.
+ * Once this method is called, the implementing class should be reset to the
+ * state before any processLogLine() calls were made.
+ *
+ * @return a matching error, or null if a suitable match wasn't found.
+ *
+ */
+ ErrorAndSolution getErrorAndSolution();
+}
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/MapAggrMemErrorHeuristic.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/MapAggrMemErrorHeuristic.java?rev=916495&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/MapAggrMemErrorHeuristic.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/MapAggrMemErrorHeuristic.java Thu Feb 25 23:42:37 2010
@@ -0,0 +1,73 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.errors;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.mapred.JobConf;
+
+/**
+ * Detects out-of-memory errors when hash tables in map-based aggregation group
+ * by queries take up too much memory.
+ *
+ * Conditions to check
+ *
+ * 1. The query contains a group by.
+ * 2. Map-side aggregation is turned on.
+ * 3. There is a out of memory exception in the log.
+ */
+public class MapAggrMemErrorHeuristic extends RegexErrorHeuristic {
+
+ private static final String OUT_OF_MEMORY_REGEX = "OutOfMemoryError";
+ private boolean configMatches = false;
+
+ public MapAggrMemErrorHeuristic() {
+ setQueryRegex("group by");
+ getLogRegexes().add(OUT_OF_MEMORY_REGEX);
+ }
+
+ @Override
+ public void init(String query, JobConf conf) {
+ super.init(query, conf);
+ configMatches = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEMAPSIDEAGGREGATE);
+ }
+
+ @Override
+ public ErrorAndSolution getErrorAndSolution() {
+ ErrorAndSolution es = null;
+ if(getQueryMatches() && configMatches) {
+ List<String> matchingLines = getRegexToLogLines().get(OUT_OF_MEMORY_REGEX);
+
+ if (matchingLines.size() > 0) {
+ String confName = HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY.toString();
+ float confValue = HiveConf.getFloatVar(getConf(),
+ HiveConf.ConfVars.HIVEMAPAGGRHASHMEMORY);
+
+ es = new ErrorAndSolution(
+ "Out of memory due to hash maps used in map-side aggregation.",
+ "Currently " + confName + " is set to " + confValue + ". " +
+ "Try setting it to a lower value. i.e " +
+ "'set " + confName + " = " + confValue/2 + ";'");
+ }
+ }
+ reset();
+ return es;
+ }
+}
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java?rev=916495&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/RegexErrorHeuristic.java Thu Feb 25 23:42:37 2010
@@ -0,0 +1,129 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.errors;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.Map.Entry;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.mapred.JobConf;
+
+/**
+ * Simple heuristic where the query and the lines of the task log file are run
+ * through regular expressions to see if they resemble a known error condition.
+ *
+ * Only a single regular expression can be supplied to match the query whereas
+ * multiple regular expressions can be supplied to match lines from the log file.
+ * A mapping is maintained from the regular expression to the lines from the log
+ * file that it matched.
+ */
+public abstract class RegexErrorHeuristic implements ErrorHeuristic {
+
+ private String query = null;
+ private JobConf conf = null;
+
+ // Pattern to look for in the hive query and whether it matched
+ private String queryRegex = null;
+ private boolean queryMatches = false;
+
+ // The regexes to look for in the log files
+ private final Set<String> logRegexes = new HashSet<String>();
+
+ // Mapping from the regex to lines in the log file where find() == true
+ private final Map<String, List<String>> regexToLogLines = new HashMap<String, List<String>>();
+ private final Map<String, Pattern> regexToPattern = new HashMap<String, Pattern>();
+
+ public RegexErrorHeuristic() {
+ }
+
+ protected void setQueryRegex(String queryRegex) {
+ this.queryRegex = queryRegex;
+ }
+
+ protected String getQueryRegex() {
+ return queryRegex;
+ }
+
+ protected boolean getQueryMatches() {
+ return queryMatches;
+ }
+
+ protected Set<String> getLogRegexes() {
+ return this.logRegexes;
+ }
+
+ protected Map<String, List<String>> getRegexToLogLines() {
+ return this.regexToLogLines;
+ }
+
+ protected JobConf getConf() {
+ return conf;
+ }
+
+ @Override
+ /**
+ * Before init is called, logRegexes and queryRegexes should be populated.
+ */
+ public void init(String query, JobConf conf) {
+ this.query = query;
+ this.conf = conf;
+
+ assert((logRegexes!=null) && (queryRegex != null));
+
+ Pattern queryPattern = Pattern.compile(queryRegex, Pattern.CASE_INSENSITIVE);
+ queryMatches = queryPattern.matcher(query).find();
+
+ for(String regex : logRegexes) {
+ regexToPattern.put(regex, Pattern.compile(regex, Pattern.CASE_INSENSITIVE));
+ regexToLogLines.put(regex, new ArrayList<String>());
+ }
+
+ }
+
+ @Override
+ public abstract ErrorAndSolution getErrorAndSolution();
+
+ @Override
+ public void processLogLine(String line) {
+ if(queryMatches) {
+ for(Entry<String, Pattern> e : regexToPattern.entrySet()) {
+ String regex = e.getKey();
+ Pattern p = e.getValue();
+ boolean lineMatches = p.matcher(line).find();
+ if(lineMatches) {
+ regexToLogLines.get(regex).add(line);
+ }
+ }
+ }
+ }
+
+ /**
+ * Resets to state before any processLogLine() calls.
+ */
+ protected void reset() {
+ for(List<String> lst : regexToLogLines.values()) {
+ lst.clear();
+ }
+ }
+}
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ScriptErrorHeuristic.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ScriptErrorHeuristic.java?rev=916495&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ScriptErrorHeuristic.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/ScriptErrorHeuristic.java Thu Feb 25 23:42:37 2010
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.errors;
+
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Detects when a query has failed because a user's script that was specified in
+ * transform returns a non-zero error code.
+ *
+ * Conditions to check:
+ *
+ * 1. "Script failed with code <some number>" is in the log
+ *
+ */
+
+public class ScriptErrorHeuristic extends RegexErrorHeuristic {
+
+ private static final String FAILED_REGEX = "Script failed with code [0-9]+";
+
+ public ScriptErrorHeuristic() {
+ setQueryRegex(".*");
+ getLogRegexes().add(FAILED_REGEX);
+ }
+
+ @Override
+ public ErrorAndSolution getErrorAndSolution() {
+ ErrorAndSolution es = null;
+
+ if(getQueryMatches()) {
+ for(List<String> matchingLines : getRegexToLogLines().values()) {
+ // There should really only be one line with "Script failed..."
+ if (matchingLines.size() > 0) {
+ assert(matchingLines.size() == 1);
+
+ // Get "Script failed with code <some number>"
+ Matcher m1 = Pattern.compile(FAILED_REGEX).matcher(matchingLines.get(0));
+ m1.find();
+ String failedStr = m1.group();
+
+ // Get "<some number>"
+ Matcher m2 = Pattern.compile("[0-9]+").matcher(failedStr);
+ m2.find();
+ String errorCode = m2.group();
+
+ es = new ErrorAndSolution(
+ "A user-supplied transfrom script has exited with error code " +
+ errorCode + " instead of 0.",
+ "Verify that the script can properly handle all the input rows " +
+ "without throwing exceptions and exits properly.");
+ }
+ }
+ }
+
+ reset();
+ return es;
+ }
+}
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java?rev=916495&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/TaskLogProcessor.java Thu Feb 25 23:42:37 2010
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.exec.errors;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.mapred.JobConf;
+
+/**
+ * TaskLogProcessor reads the logs from failed task attempts and tries to figure
+ * out what the cause of the error was using various heuristics.
+ */
+public class TaskLogProcessor {
+
+ private final Map<ErrorHeuristic, HeuristicStats> heuristics =
+ new HashMap<ErrorHeuristic, HeuristicStats>();
+ private final List<String> taskLogUrls = new ArrayList<String>();
+
+ private JobConf conf = null;
+ // Query is the hive query string i.e. "SELECT * FROM src;" associated with
+ // this set of tasks logs
+ private String query = null;
+
+ public TaskLogProcessor(JobConf conf) {
+ this.conf = conf;
+ query = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYSTRING);
+
+ heuristics.put(new ScriptErrorHeuristic(), new HeuristicStats());
+ heuristics.put(new MapAggrMemErrorHeuristic(), new HeuristicStats());
+ heuristics.put(new DataCorruptErrorHeuristic(), new HeuristicStats());
+ for(ErrorHeuristic e : heuristics.keySet()) {
+ e.init(query, conf);
+ }
+ }
+
+ /**
+ * Adds a task log URL for the heuristics to read through.
+ * @param url
+ */
+ public void addTaskAttemptLogUrl(String url) {
+ taskLogUrls.add(url);
+ }
+
+ private static class HeuristicStats {
+
+ // The number of times eh has returned non-null errors
+ private int triggerCount = 0;
+ // All ErrorAndSolutions that ErrorHeuristic has generated. For the same error, they
+ // should be the same though it's possible that different file paths etc
+ // could generate different error messages
+ private final List<ErrorAndSolution> ens = new ArrayList<ErrorAndSolution>();
+
+ HeuristicStats() {
+ }
+
+ int getTriggerCount() {
+ return triggerCount;
+ }
+
+ void incTriggerCount() {
+ triggerCount++;
+ }
+
+ List<ErrorAndSolution> getErrorAndSolutions() {
+ return ens;
+ }
+
+ void addErrorAndSolution(ErrorAndSolution e) {
+ ens.add(e);
+ }
+ }
+
+ /**
+ * Processes the provided task logs using the known error heuristics to get
+ * the matching errors.
+ * @return A ErrorAndSolution from the ErrorHeuristic that most frequently
+ * generated matches. In case of ties, multiple ErrorAndSolutions will be
+ * returned.
+ */
+ public List<ErrorAndSolution> getErrors() {
+
+ for(String urlString : taskLogUrls) {
+
+ // Open the log file, and read in a line. Then feed the line into
+ // each of the ErrorHeuristics. Repeat for all the lines in the log.
+ URL taskAttemptLogUrl;
+ try {
+ taskAttemptLogUrl = new URL(urlString);
+ } catch(MalformedURLException e) {
+ throw new RuntimeException("Bad task log url", e);
+ }
+ BufferedReader in;
+ try {
+ in = new BufferedReader(
+ new InputStreamReader(taskAttemptLogUrl.openStream()));
+ String inputLine;
+ while ((inputLine = in.readLine()) != null) {
+ for(ErrorHeuristic e : heuristics.keySet()) {
+ e.processLogLine(inputLine);
+ }
+ }
+ in.close();
+ } catch (IOException e) {
+ throw new RuntimeException("Error while reading from task log url", e);
+ }
+
+ // Once the lines of the log file have been fed into the ErrorHeuristics,
+ // see if they have detected anything. If any has, record
+ // what ErrorAndSolution it gave so we can later return the most
+ // frequently occurring error
+ for(Entry<ErrorHeuristic, HeuristicStats> ent : heuristics.entrySet()) {
+ ErrorHeuristic eh = ent.getKey();
+ HeuristicStats hs = ent.getValue();
+
+ ErrorAndSolution es = eh.getErrorAndSolution();
+ if(es != null) {
+ hs.incTriggerCount();
+ hs.addErrorAndSolution(es);
+ }
+ }
+
+ }
+
+ // Return the errors that occur the most frequently
+ int max = 0;
+ for(HeuristicStats hs : heuristics.values()) {
+ if(hs.getTriggerCount() > max) {
+ max = hs.getTriggerCount();
+ }
+ }
+
+ List<ErrorAndSolution> errors = new ArrayList<ErrorAndSolution>();
+ for(HeuristicStats hs : heuristics.values()) {
+ if(hs.getTriggerCount() == max) {
+ if(hs.getErrorAndSolutions().size() > 0) {
+ // An error heuristic could have generated different ErrorAndSolution
+ // for each task attempt, but most likely they are the same. Plus,
+ // one of those is probably good enough for debugging
+ errors.add(hs.getErrorAndSolutions().get(0));
+ }
+ }
+ }
+
+ return errors;
+ }
+
+}
Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/package-info.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/package-info.java?rev=916495&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/package-info.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/errors/package-info.java Thu Feb 25 23:42:37 2010
@@ -0,0 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/** Generates execution error messages using heuristics. */
+package org.apache.hadoop.hive.ql.exec.errors;