You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by om...@apache.org on 2011/03/08 06:59:34 UTC
svn commit: r1079242 - in /hadoop/mapreduce/branches/yahoo-merge/src:
test/mapred/org/apache/hadoop/tools/rumen/
tools/org/apache/hadoop/tools/rumen/
Author: omalley
Date: Tue Mar 8 05:59:34 2011
New Revision: 1079242
URL: http://svn.apache.org/viewvc?rev=1079242&view=rev
Log:
commit bec8eb5a48487d977a1a0a746a09e436ca3117c6
Author: Ravi Gummadi <gr...@yahoo-inc.com>
Date: Wed Jan 12 00:44:58 2011 +0530
: Fix Rumen TraceBuilder to not skip analyzing
(a) y-trunk jobhistory conf file and (b) latest FRED histroy files.
Patch is available at
(gravi)
Added:
hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java
Modified:
hadoop/mapreduce/branches/yahoo-merge/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java
hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java
Modified: hadoop/mapreduce/branches/yahoo-merge/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java?rev=1079242&r1=1079241&r2=1079242&view=diff
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java (original)
+++ hadoop/mapreduce/branches/yahoo-merge/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java Tue Mar 8 05:59:34 2011
@@ -247,8 +247,57 @@ public class TestRumenJobTraces {
}
/**
- * Tests if {@link TraceBuilder} can correctly identify and parse jobhistory
- * filenames. The testcase checks if {@link TraceBuilder}
+ * Validate the parsing of given history file name. Also validate the history
+ * file name suffixed with old/stale file suffix.
+ * @param jhFileName job history file path
+ * @param jid JobID
+ */
+ private void validateHistoryFileNameParsing(Path jhFileName,
+ org.apache.hadoop.mapred.JobID jid) {
+ JobID extractedJID =
+ JobID.forName(JobHistoryUtils.extractJobID(jhFileName.getName()));
+ assertEquals("TraceBuilder failed to parse the current JH filename"
+ + jhFileName, jid, extractedJID);
+ // test jobhistory filename with old/stale file suffix
+ jhFileName = jhFileName.suffix(JobHistory.getOldFileSuffix("123"));
+ extractedJID =
+ JobID.forName(JobHistoryUtils.extractJobID(jhFileName.getName()));
+ assertEquals("TraceBuilder failed to parse the current JH filename"
+ + "(old-suffix):" + jhFileName,
+ jid, extractedJID);
+ }
+
+ /**
+ * Validate the parsing of given history conf file name. Also validate the
+ * history conf file name suffixed with old/stale file suffix.
+ * @param jhConfFileName job history conf file path
+ * @param jid JobID
+ */
+ private void validateJHConfFileNameParsing(Path jhConfFileName,
+ org.apache.hadoop.mapred.JobID jid) {
+ assertTrue("TraceBuilder failed to parse the JH conf filename:"
+ + jhConfFileName,
+ JobHistoryUtils.isJobConfXml(jhConfFileName.getName()));
+ JobID extractedJID =
+ JobID.forName(JobHistoryUtils.extractJobID(jhConfFileName.getName()));
+ assertEquals("TraceBuilder failed to parse the current JH conf filename:"
+ + jhConfFileName, jid, extractedJID);
+ // Test jobhistory conf filename with old/stale file suffix
+ jhConfFileName = jhConfFileName.suffix(JobHistory.getOldFileSuffix("123"));
+ assertTrue("TraceBuilder failed to parse the current JH conf filename"
+ + " (old suffix):" + jhConfFileName,
+ JobHistoryUtils.isJobConfXml(jhConfFileName.getName()));
+ extractedJID =
+ JobID.forName(JobHistoryUtils.extractJobID(jhConfFileName.getName()));
+ assertEquals("TraceBuilder failed to parse the JH conf filename"
+ + "(old-suffix):" + jhConfFileName,
+ jid, extractedJID);
+ }
+
+ /**
+ * Tests if {@link TraceBuilder} can correctly identify and parse different
+ * versions of jobhistory filenames. The testcase checks if
+ * {@link TraceBuilder}
* - correctly identifies a jobhistory filename without suffix
* - correctly parses a jobhistory filename without suffix to extract out
* the jobid
@@ -268,30 +317,30 @@ public class TestRumenJobTraces {
final Path rootInputDir =
new Path(System.getProperty("test.tools.input.dir", ""))
.makeQualified(lfs.getUri(), lfs.getWorkingDirectory());
-
- // Check if jobhistory filename are detected properly
+
+ // Check if current jobhistory filenames are detected properly
Path jhFilename = JobHistory.getJobHistoryFile(rootInputDir, jid);
- JobID extractedJID =
- JobID.forName(TraceBuilder.extractJobID(jhFilename.getName()));
- assertEquals("TraceBuilder failed to parse the current JH filename",
- jid, extractedJID);
- // test jobhistory filename with old/stale file suffix
- jhFilename = jhFilename.suffix(JobHistory.getOldFileSuffix("123"));
- extractedJID =
- JobID.forName(TraceBuilder.extractJobID(jhFilename.getName()));
- assertEquals("TraceBuilder failed to parse the current JH filename"
- + "(old-suffix)",
- jid, extractedJID);
-
- // Check if the conf filename in jobhistory are detected properly
+ validateHistoryFileNameParsing(jhFilename, jid);
+
+ // Check if Pre21 V1 jophistory file names are detected properly
+ jhFilename = new Path("jt-identifier_" + jid + "_user-name_job-name");
+ validateHistoryFileNameParsing(jhFilename, jid);
+
+ // Check if Pre21 V2 jobhistory file names are detected properly
+ jhFilename = new Path(jid + "_user-name_job-name");
+ validateHistoryFileNameParsing(jhFilename, jid);
+
+ // Check if the current jobhistory conf filenames are detected properly
Path jhConfFilename = JobHistory.getConfFile(rootInputDir, jid);
- assertTrue("TraceBuilder failed to parse the current JH conf filename",
- TraceBuilder.isJobConfXml(jhConfFilename.getName(), null));
- // test jobhistory conf filename with old/stale file suffix
- jhConfFilename = jhConfFilename.suffix(JobHistory.getOldFileSuffix("123"));
- assertTrue("TraceBuilder failed to parse the current JH conf filename"
- + " (old suffix)",
- TraceBuilder.isJobConfXml(jhConfFilename.getName(), null));
+ validateJHConfFileNameParsing(jhConfFilename, jid);
+
+ // Check if Pre21 V1 jobhistory conf file names are detected properly
+ jhConfFilename = new Path("jt-identifier_" + jid + "_conf.xml");
+ validateJHConfFileNameParsing(jhConfFilename, jid);
+
+ // Check if Pre21 V2 jobhistory conf file names are detected properly
+ jhConfFilename = new Path(jid + "_conf.xml");
+ validateJHConfFileNameParsing(jhConfFilename, jid);
}
/**
Added: hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java?rev=1079242&view=auto
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java (added)
+++ hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java Tue Mar 8 05:59:34 2011
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistory;
+
+/**
+ * Job History related utils for handling multiple formats of history logs of
+ * different hadoop versions like Pre21 history logs, current history logs.
+ */
+public class JobHistoryUtils {
+
+ private static String applyParser(String fileName, Pattern pattern) {
+ Matcher matcher = pattern.matcher(fileName);
+
+ if (!matcher.matches()) {
+ return null;
+ }
+
+ return matcher.group(1);
+ }
+
+ /**
+ * Extracts jobID string from the given job history log file name or
+ * job history configuration file name.
+ * @param fileName name of job history file or job history configuration file
+ * @return a valid jobID String, parsed out of the file name. Otherwise,
+ * [especially for .crc files] returns null.
+ */
+ static String extractJobID(String fileName) {
+ // Get jobID if fileName is a config file name.
+ String jobId = extractJobIDFromConfFileName(fileName);
+ if (jobId == null) {
+ // Get JobID if fileName is a job history file name
+ jobId = extractJobIDFromHistoryFileName(fileName);
+ }
+ return jobId;
+ }
+
+ /**
+ * Extracts jobID string from the given job history file name.
+ * @param fileName name of the job history file
+ * @return JobID if the given <code>fileName</code> is a valid job history
+ * file name, <code>null</code> otherwise.
+ */
+ private static String extractJobIDFromHistoryFileName(String fileName) {
+ // History file name could be in one of the following formats
+ // (1) old pre21 job history file name format
+ // (2) new pre21 job history file name format
+ // (3) current job history file name format i.e. 0.22
+ String pre21JobID = applyParser(fileName,
+ Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX_V1);
+ if (pre21JobID == null) {
+ pre21JobID = applyParser(fileName,
+ Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX_V2);
+ }
+ if (pre21JobID != null) {
+ return pre21JobID;
+ }
+ return applyParser(fileName, JobHistory.JOBHISTORY_FILENAME_REGEX);
+ }
+
+ /**
+ * Extracts jobID string from the given job conf xml file name.
+ * @param fileName name of the job conf xml file
+ * @return job id if the given <code>fileName</code> is a valid job conf xml
+ * file name, <code>null</code> otherwise.
+ */
+ private static String extractJobIDFromConfFileName(String fileName) {
+ // History conf file name could be in one of the following formats
+ // (1) old pre21 job history file name format
+ // (2) new pre21 job history file name format
+ // (3) current job history file name format i.e. 0.22
+ String pre21JobID = applyParser(fileName,
+ Pre21JobHistoryConstants.CONF_FILENAME_REGEX_V1);
+ if (pre21JobID == null) {
+ pre21JobID = applyParser(fileName,
+ Pre21JobHistoryConstants.CONF_FILENAME_REGEX_V2);
+ }
+ if (pre21JobID != null) {
+ return pre21JobID;
+ }
+ return applyParser(fileName, JobHistory.CONF_FILENAME_REGEX);
+ }
+
+ /**
+ * Checks if the given <code>fileName</code> is a valid job conf xml file name
+ * @param fileName name of the file to be validated
+ * @return <code>true</code> if the given <code>fileName</code> is a valid
+ * job conf xml file name.
+ */
+ static boolean isJobConfXml(String fileName) {
+ String jobId = extractJobIDFromConfFileName(fileName);
+ return jobId != null;
+ }
+}
Modified: hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java?rev=1079242&r1=1079241&r2=1079242&view=diff
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java (original)
+++ hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java Tue Mar 8 05:59:34 2011
@@ -20,10 +20,10 @@ package org.apache.hadoop.tools.rumen;
import java.util.regex.Pattern;
import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistory;
/**
- *
- *
+ * Job History related constants for Hadoop releases prior to 0.21
*/
public class Pre21JobHistoryConstants {
@@ -51,18 +51,31 @@ public class Pre21JobHistoryConstants {
}
/**
- * Pre21 regex for jobhistory filename
+ * Regex for Pre21 V1(old) jobhistory filename
* i.e jt-identifier_job-id_user-name_job-name
*/
- static final Pattern JOBHISTORY_FILENAME_REGEX =
+ static final Pattern JOBHISTORY_FILENAME_REGEX_V1 =
Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX + ")_.+");
+ /**
+ * Regex for Pre21 V2(new) jobhistory filename
+ * i.e job-id_user-name_job-name
+ */
+ static final Pattern JOBHISTORY_FILENAME_REGEX_V2 =
+ Pattern.compile("(" + JobID.JOBID_REGEX + ")_.+");
/**
- * Pre21 regex for jobhistory conf filename
+ * Regex for Pre21 V1(old) jobhistory conf filename
* i.e jt-identifier_job-id_conf.xml
*/
- static final Pattern CONF_FILENAME_REGEX =
- Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX
- + ")_conf.xml(?:\\.[0-9a-zA-Z]+)?");
+ static final Pattern CONF_FILENAME_REGEX_V1 =
+ Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX + ")_conf.xml"
+ + JobHistory.OLD_FULL_SUFFIX_REGEX_STRING + "?");
+ /**
+ * Regex for Pre21 V2(new) jobhistory conf filename
+ * i.e job-id_conf.xml
+ */
+ static final Pattern CONF_FILENAME_REGEX_V2 =
+ Pattern.compile("(" + JobID.JOBID_REGEX + ")_conf.xml"
+ + JobHistory.OLD_FULL_SUFFIX_REGEX_STRING + "?");
}
Modified: hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java
URL: http://svn.apache.org/viewvc/hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java?rev=1079242&r1=1079241&r2=1079242&view=diff
==============================================================================
--- hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java (original)
+++ hadoop/mapreduce/branches/yahoo-merge/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java Tue Mar 8 05:59:34 2011
@@ -154,40 +154,6 @@ public class TraceBuilder extends Config
}
}
- private static String applyParser(String fileName, Pattern pattern) {
- Matcher matcher = pattern.matcher(fileName);
-
- if (!matcher.matches()) {
- return null;
- }
-
- return matcher.group(1);
- }
-
- /**
- * @param fileName
- * @return the jobID String, parsed out of the file name. We return a valid
- * String for either a history log file or a config file. Otherwise,
- * [especially for .crc files] we return null.
- */
- static String extractJobID(String fileName) {
- String pre21JobID
- = applyParser(fileName,
- Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX);
- String jobId = applyParser(fileName, JobHistory.JOBHISTORY_FILENAME_REGEX);
- return jobId == null ? pre21JobID : jobId;
- }
-
- static boolean isJobConfXml(String fileName, InputStream input) {
- String jobId = applyParser(fileName, JobHistory.CONF_FILENAME_REGEX);
- if (jobId == null) {
- // check if its a pre21 jobhistory conf file
- jobId = applyParser(fileName,
- Pre21JobHistoryConstants.CONF_FILENAME_REGEX);
- }
- return jobId != null;
- }
-
@SuppressWarnings("unchecked")
@Override
public int run(String[] args) throws Exception {
@@ -221,7 +187,7 @@ public class TraceBuilder extends Config
JobHistoryParser parser = null;
try {
- String jobID = extractJobID(filePair.first());
+ String jobID = JobHistoryUtils.extractJobID(filePair.first());
if (jobID == null) {
LOG.warn("File skipped: Invalid file name: "
+ filePair.first());
@@ -235,7 +201,7 @@ public class TraceBuilder extends Config
jobBuilder = new JobBuilder(jobID);
}
- if (isJobConfXml(filePair.first(), ris)) {
+ if (JobHistoryUtils.isJobConfXml(filePair.first())) {
processJobConf(JobConfigurationParser.parse(ris.rewind()), jobBuilder);
} else {
parser = JobHistoryParserFactory.getParser(ris);