You are viewing a plain text version of this content. The canonical link for it is here.
Posted to mapreduce-commits@hadoop.apache.org by ra...@apache.org on 2011/10/12 11:58:14 UTC

svn commit: r1182293 - in /hadoop/common/trunk/hadoop-mapreduce-project: ./ src/test/mapred/org/apache/hadoop/tools/rumen/ src/tools/org/apache/hadoop/tools/rumen/

Author: ravigummadi
Date: Wed Oct 12 09:58:14 2011
New Revision: 1182293

URL: http://svn.apache.org/viewvc?rev=1182293&view=rev
Log:
MAPREDUCE-3157. [Rumen] Fix TraceBuilder to handle 0.20 history file names also.

Added:
    hadoop/common/trunk/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java
Modified:
    hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt
    hadoop/common/trunk/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
    hadoop/common/trunk/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java
    hadoop/common/trunk/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java

Modified: hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt?rev=1182293&r1=1182292&r2=1182293&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/CHANGES.txt Wed Oct 12 09:58:14 2011
@@ -24,6 +24,9 @@ Trunk (unreleased changes)
 
   BUG FIXES
 
+    MAPREDUCE-3157. [Rumen] Fix TraceBuilder to handle 0.20 history file
+    names also. (Ravi Gummadi)
+
     MAPREDUCE-2950. [Gridmix] TestUserResolve fails in trunk. 
                     (Ravi Gummadi via amarrk)
 

Modified: hadoop/common/trunk/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java?rev=1182293&r1=1182292&r2=1182293&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/test/mapred/org/apache/hadoop/tools/rumen/TestRumenJobTraces.java Wed Oct 12 09:58:14 2011
@@ -246,8 +246,57 @@ public class TestRumenJobTraces {
   }
 
   /**
-   * Tests if {@link TraceBuilder} can correctly identify and parse jobhistory
-   * filenames. The testcase checks if {@link TraceBuilder}
+   * Validate the parsing of given history file name. Also validate the history
+   * file name suffixed with old/stale file suffix.
+   * @param jhFileName job history file path
+   * @param jid JobID
+   */
+  private void validateHistoryFileNameParsing(Path jhFileName,
+      org.apache.hadoop.mapred.JobID jid) {
+    JobID extractedJID =
+      JobID.forName(JobHistoryUtils.extractJobID(jhFileName.getName()));
+    assertEquals("TraceBuilder failed to parse the current JH filename"
+                 + jhFileName, jid, extractedJID);
+    // test jobhistory filename with old/stale file suffix
+    jhFileName = jhFileName.suffix(JobHistory.getOldFileSuffix("123"));
+    extractedJID =
+      JobID.forName(JobHistoryUtils.extractJobID(jhFileName.getName()));
+    assertEquals("TraceBuilder failed to parse the current JH filename"
+                 + "(old-suffix):" + jhFileName,
+                 jid, extractedJID);
+  }
+
+  /**
+   * Validate the parsing of given history conf file name. Also validate the
+   * history conf file name suffixed with old/stale file suffix.
+   * @param jhConfFileName job history conf file path
+   * @param jid JobID
+   */
+  private void validateJHConfFileNameParsing(Path jhConfFileName,
+      org.apache.hadoop.mapred.JobID jid) {
+    assertTrue("TraceBuilder failed to parse the JH conf filename:"
+               + jhConfFileName,
+               JobHistoryUtils.isJobConfXml(jhConfFileName.getName()));
+    JobID extractedJID =
+      JobID.forName(JobHistoryUtils.extractJobID(jhConfFileName.getName()));
+    assertEquals("TraceBuilder failed to parse the current JH conf filename:"
+                 + jhConfFileName, jid, extractedJID);
+    // Test jobhistory conf filename with old/stale file suffix
+    jhConfFileName = jhConfFileName.suffix(JobHistory.getOldFileSuffix("123"));
+    assertTrue("TraceBuilder failed to parse the current JH conf filename"
+               + " (old suffix):" + jhConfFileName,
+               JobHistoryUtils.isJobConfXml(jhConfFileName.getName()));
+    extractedJID =
+      JobID.forName(JobHistoryUtils.extractJobID(jhConfFileName.getName()));
+    assertEquals("TraceBuilder failed to parse the JH conf filename"
+                 + "(old-suffix):" + jhConfFileName,
+                 jid, extractedJID);
+  }
+
+  /**
+   * Tests if {@link TraceBuilder} can correctly identify and parse different
+   * versions of jobhistory filenames. The testcase checks if
+   * {@link TraceBuilder}
    *   - correctly identifies a jobhistory filename without suffix
    *   - correctly parses a jobhistory filename without suffix to extract out 
    *     the jobid
@@ -261,36 +310,36 @@ public class TestRumenJobTraces {
   public void testJobHistoryFilenameParsing() throws IOException {
     final Configuration conf = new Configuration();
     final FileSystem lfs = FileSystem.getLocal(conf);
-    String user = "test";
+    String user = "testUser";
     org.apache.hadoop.mapred.JobID jid = 
       new org.apache.hadoop.mapred.JobID("12345", 1);
     final Path rootInputDir =
       new Path(System.getProperty("test.tools.input.dir", ""))
             .makeQualified(lfs.getUri(), lfs.getWorkingDirectory());
     
-    // Check if jobhistory filename are detected properly
+    // Check if current jobhistory filenames are detected properly
     Path jhFilename = JobHistory.getJobHistoryFile(rootInputDir, jid, user);
-    JobID extractedJID = 
-      JobID.forName(TraceBuilder.extractJobID(jhFilename.getName()));
-    assertEquals("TraceBuilder failed to parse the current JH filename", 
-                 jid, extractedJID);
-    // test jobhistory filename with old/stale file suffix
-    jhFilename = jhFilename.suffix(JobHistory.getOldFileSuffix("123"));
-    extractedJID =
-      JobID.forName(TraceBuilder.extractJobID(jhFilename.getName()));
-    assertEquals("TraceBuilder failed to parse the current JH filename"
-                 + "(old-suffix)", 
-                 jid, extractedJID);
-    
-    // Check if the conf filename in jobhistory are detected properly
+    validateHistoryFileNameParsing(jhFilename, jid);
+
+    // Check if Pre21 V1 jophistory file names are detected properly
+    jhFilename = new Path("jt-identifier_" + jid + "_user-name_job-name");
+    validateHistoryFileNameParsing(jhFilename, jid);
+
+    // Check if Pre21 V2 jobhistory file names are detected properly
+    jhFilename = new Path(jid + "_user-name_job-name");
+    validateHistoryFileNameParsing(jhFilename, jid);
+
+    // Check if the current jobhistory conf filenames are detected properly
     Path jhConfFilename = JobHistory.getConfFile(rootInputDir, jid);
-    assertTrue("TraceBuilder failed to parse the current JH conf filename", 
-               TraceBuilder.isJobConfXml(jhConfFilename.getName(), null));
-    // test jobhistory conf filename with old/stale file suffix
-    jhConfFilename = jhConfFilename.suffix(JobHistory.getOldFileSuffix("123"));
-    assertTrue("TraceBuilder failed to parse the current JH conf filename" 
-               + " (old suffix)", 
-               TraceBuilder.isJobConfXml(jhConfFilename.getName(), null));
+    validateJHConfFileNameParsing(jhConfFilename, jid);
+
+    // Check if Pre21 V1 jobhistory conf file names are detected properly
+    jhConfFilename = new Path("jt-identifier_" + jid + "_conf.xml");
+    validateJHConfFileNameParsing(jhConfFilename, jid);
+
+    // Check if Pre21 V2 jobhistory conf file names are detected properly
+    jhConfFilename = new Path(jid + "_conf.xml");
+    validateJHConfFileNameParsing(jhConfFilename, jid);
   }
 
   /**

Added: hadoop/common/trunk/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java?rev=1182293&view=auto
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java (added)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/JobHistoryUtils.java Wed Oct 12 09:58:14 2011
@@ -0,0 +1,115 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.tools.rumen;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistory;
+
+/**
+ * Job History related utils for handling multiple formats of history logs of
+ * different hadoop versions like Pre21 history logs, current history logs.
+ */
+public class JobHistoryUtils {
+
+  private static String applyParser(String fileName, Pattern pattern) {
+    Matcher matcher = pattern.matcher(fileName);
+
+    if (!matcher.matches()) {
+      return null;
+    }
+
+    return matcher.group(1);
+  }
+
+  /**
+   * Extracts jobID string from the given job history log file name or
+   * job history configuration file name.
+   * @param fileName name of job history file or job history configuration file
+   * @return a valid jobID String, parsed out of the file name. Otherwise,
+   *         [especially for .crc files] returns null.
+   */
+  static String extractJobID(String fileName) {
+    // Get jobID if fileName is a config file name.
+    String jobId = extractJobIDFromConfFileName(fileName);
+    if (jobId == null) {
+      // Get JobID if fileName is a job history file name
+      jobId = extractJobIDFromHistoryFileName(fileName);
+    }
+    return jobId;
+  }
+
+  /**
+   * Extracts jobID string from the given job history file name.
+   * @param fileName name of the job history file
+   * @return JobID if the given <code>fileName</code> is a valid job history
+   *         file name, <code>null</code> otherwise.
+   */
+  private static String extractJobIDFromHistoryFileName(String fileName) {
+    // History file name could be in one of the following formats
+    // (1) old pre21 job history file name format
+    // (2) new pre21 job history file name format
+    // (3) current job history file name format i.e. 0.22
+    String pre21JobID = applyParser(fileName,
+        Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX_V1);
+    if (pre21JobID == null) {
+      pre21JobID = applyParser(fileName,
+          Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX_V2);
+    }
+    if (pre21JobID != null) {
+      return pre21JobID;
+    }
+    return applyParser(fileName, JobHistory.JOBHISTORY_FILENAME_REGEX);
+  }
+
+  /**
+   * Extracts jobID string from the given job conf xml file name.
+   * @param fileName name of the job conf xml file
+   * @return job id if the given <code>fileName</code> is a valid job conf xml
+   *         file name, <code>null</code> otherwise.
+   */
+  private static String extractJobIDFromConfFileName(String fileName) {
+    // History conf file name could be in one of the following formats
+    // (1) old pre21 job history file name format
+    // (2) new pre21 job history file name format
+    // (3) current job history file name format i.e. 0.22
+    String pre21JobID = applyParser(fileName,
+                          Pre21JobHistoryConstants.CONF_FILENAME_REGEX_V1);
+    if (pre21JobID == null) {
+      pre21JobID = applyParser(fileName,
+                     Pre21JobHistoryConstants.CONF_FILENAME_REGEX_V2);
+    }
+    if (pre21JobID != null) {
+      return pre21JobID;
+    }
+    return applyParser(fileName, JobHistory.CONF_FILENAME_REGEX);
+  }
+
+  /**
+   * Checks if the given <code>fileName</code> is a valid job conf xml file name
+   * @param fileName name of the file to be validated
+   * @return <code>true</code> if the given <code>fileName</code> is a valid
+   *         job conf xml file name.
+   */
+  static boolean isJobConfXml(String fileName) {
+    String jobId = extractJobIDFromConfFileName(fileName);
+    return jobId != null;
+  }
+}

Modified: hadoop/common/trunk/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java?rev=1182293&r1=1182292&r2=1182293&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/Pre21JobHistoryConstants.java Wed Oct 12 09:58:14 2011
@@ -20,10 +20,10 @@ package org.apache.hadoop.tools.rumen;
 import java.util.regex.Pattern;
 
 import org.apache.hadoop.mapreduce.JobID;
+import org.apache.hadoop.mapreduce.jobhistory.JobHistory;
 
 /**
- * 
- *
+ * Job History related constants for Hadoop releases prior to 0.21
  */
 public class Pre21JobHistoryConstants {
   
@@ -51,18 +51,34 @@ public class Pre21JobHistoryConstants {
   }
   
   /**
-   * Pre21 regex for jobhistory filename 
+   * Regex for Pre21 V1(old) jobhistory filename
    *   i.e jt-identifier_job-id_user-name_job-name
    */
-  static final Pattern JOBHISTORY_FILENAME_REGEX =
+  static final Pattern JOBHISTORY_FILENAME_REGEX_V1 =
     Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX + ")_.+");
+  /**
+   * Regex for Pre21 V2(new) jobhistory filename
+   *   i.e job-id_user-name_job-name
+   */
+  static final Pattern JOBHISTORY_FILENAME_REGEX_V2 =
+    Pattern.compile("(" + JobID.JOBID_REGEX + ")_.+");
+
+  static final String OLD_FULL_SUFFIX_REGEX_STRING =
+    "(?:\\.[0-9]+" + Pattern.quote(JobHistory.OLD_SUFFIX) + ")";
 
   /**
-   * Pre21 regex for jobhistory conf filename 
+   * Regex for Pre21 V1(old) jobhistory conf filename 
    *   i.e jt-identifier_job-id_conf.xml
    */
-  static final Pattern CONF_FILENAME_REGEX =
-    Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX 
-                    + ")_conf.xml(?:\\.[0-9a-zA-Z]+)?");
+  static final Pattern CONF_FILENAME_REGEX_V1 =
+    Pattern.compile("[^.].+_(" + JobID.JOBID_REGEX + ")_conf.xml"
+                    + OLD_FULL_SUFFIX_REGEX_STRING + "?");
+  /**
+   * Regex for Pre21 V2(new) jobhistory conf filename
+   *   i.e job-id_conf.xml
+   */
+  static final Pattern CONF_FILENAME_REGEX_V2 =
+    Pattern.compile("(" + JobID.JOBID_REGEX + ")_conf.xml"
+                    + OLD_FULL_SUFFIX_REGEX_STRING + "?");
  
 }

Modified: hadoop/common/trunk/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java
URL: http://svn.apache.org/viewvc/hadoop/common/trunk/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java?rev=1182293&r1=1182292&r2=1182293&view=diff
==============================================================================
--- hadoop/common/trunk/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java (original)
+++ hadoop/common/trunk/hadoop-mapreduce-project/src/tools/org/apache/hadoop/tools/rumen/TraceBuilder.java Wed Oct 12 09:58:14 2011
@@ -198,42 +198,6 @@ public class TraceBuilder extends Config
     }
   }
 
-  private static String applyParser(String fileName, Pattern pattern) {
-    Matcher matcher = pattern.matcher(fileName);
-
-    if (!matcher.matches()) {
-      return null;
-    }
-
-    return matcher.group(1);
-  }
-
-  /**
-   * @param fileName
-   * @return the jobID String, parsed out of the file name. We return a valid
-   *         String for either a history log file or a config file. Otherwise,
-   *         [especially for .crc files] we return null.
-   */
-  static String extractJobID(String fileName) {
-    String jobId = applyParser(fileName, JobHistory.JOBHISTORY_FILENAME_REGEX);
-    if (jobId == null) {
-      // check if its a pre21 jobhistory file
-      jobId = applyParser(fileName, 
-                          Pre21JobHistoryConstants.JOBHISTORY_FILENAME_REGEX);
-    }
-    return jobId;
-  }
-
-  static boolean isJobConfXml(String fileName, InputStream input) {
-    String jobId = applyParser(fileName, JobHistory.CONF_FILENAME_REGEX);
-    if (jobId == null) {
-      // check if its a pre21 jobhistory conf file
-      jobId = applyParser(fileName, 
-                          Pre21JobHistoryConstants.CONF_FILENAME_REGEX);
-    }
-    return jobId != null;
-  }
-
 
   @SuppressWarnings("unchecked")
   @Override
@@ -268,7 +232,7 @@ public class TraceBuilder extends Config
             JobHistoryParser parser = null;
 
             try {
-              String jobID = extractJobID(filePair.first());
+              String jobID = JobHistoryUtils.extractJobID(filePair.first());
               if (jobID == null) {
                 LOG.warn("File skipped: Invalid file name: "
                     + filePair.first());
@@ -282,8 +246,9 @@ public class TraceBuilder extends Config
                 jobBuilder = new JobBuilder(jobID);
               }
 
-              if (isJobConfXml(filePair.first(), ris)) {
-            	processJobConf(JobConfigurationParser.parse(ris.rewind()), jobBuilder);
+              if (JobHistoryUtils.isJobConfXml(filePair.first())) {
+                processJobConf(JobConfigurationParser.parse(ris.rewind()),
+                               jobBuilder);
               } else {
                 parser = JobHistoryParserFactory.getParser(ris);
                 if (parser == null) {