You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2010/07/03 19:59:30 UTC

svn commit: r960248 - in /nutch/branches/branch-1.2: ./ conf/ src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/indexer/ src/java/org/apache/nutch/indexer/field/ src/java/org/apache/nutch/indexer/solr/ src/ja...

Author: mattmann
Date: Sat Jul  3 17:59:29 2010
New Revision: 960248

URL: http://svn.apache.org/viewvc?rev=960248&view=rev
Log:
fix for NUTCH-838 Add timing information to all Tool classes backported to 1.2-branch

Added:
    nutch/branches/branch-1.2/src/java/org/apache/nutch/util/TimingUtil.java   (with props)
Modified:
    nutch/branches/branch-1.2/CHANGES.txt
    nutch/branches/branch-1.2/conf/log4j.properties
    nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/CrawlDb.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/Generator.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/Injector.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/LinkDb.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/LinkDbMerger.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/LinkDbReader.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/fetcher/Fetcher.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/fetcher/OldFetcher.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/IndexMerger.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/IndexSorter.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/Indexer.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/AnchorFields.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/BasicFields.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/CustomFields.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/FieldIndexer.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/parse/ParseSegment.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/Loops.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/CrawlDBScanner.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/FreeGenerator.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/compat/CrawlDbConverter.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/compat/ReprUrlFixer.java
    nutch/branches/branch-1.2/src/java/org/apache/nutch/util/domain/DomainStatistics.java

Modified: nutch/branches/branch-1.2/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/CHANGES.txt?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/CHANGES.txt (original)
+++ nutch/branches/branch-1.2/CHANGES.txt Sat Jul  3 17:59:29 2010
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 1.2 - Current Development
 
+* NUTCH-838 Add timing information to all Tool classes (Jeroen van Vianen, mattmann)
+
 * NUTCH-835 Document deduplication failed using MD5Signature (Sebastian Nagel via ab)
 
 * NUTCH-831 Allow configuration of how fields crawled by Nutch are stored / indexed / 

Modified: nutch/branches/branch-1.2/conf/log4j.properties
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/conf/log4j.properties?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/conf/log4j.properties (original)
+++ nutch/branches/branch-1.2/conf/log4j.properties Sat Jul  3 17:59:29 2010
@@ -25,6 +25,9 @@ log4j.logger.org.apache.nutch.crawl.Link
 log4j.logger.org.apache.nutch.indexer.Indexer=INFO,cmdstdout
 log4j.logger.org.apache.nutch.indexer.DeleteDuplicates=INFO,cmdstdout
 log4j.logger.org.apache.nutch.indexer.IndexMerger=INFO,cmdstdout
+log4j.logger.org.apache.nutch.indexer.solr.SolrIndexer=INFO,cmdstdout
+log4j.logger.org.apache.nutch.indexer.solr.SolrDeleteDuplicates=INFO,cmdstdout
+
 
 log4j.logger.org.apache.nutch=INFO
 log4j.logger.org.apache.hadoop=WARN

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/CrawlDb.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/CrawlDb.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/CrawlDb.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/CrawlDb.java Sat Jul  3 17:59:29 2010
@@ -18,6 +18,7 @@
 package org.apache.nutch.crawl;
 
 import java.io.*;
+import java.text.SimpleDateFormat;
 import java.util.*;
 
 // Commons Logging imports
@@ -34,6 +35,7 @@ import org.apache.nutch.util.HadoopFSUti
 import org.apache.nutch.util.LockUtil;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
 /**
  * This class takes the output of the fetcher and updates the
@@ -63,8 +65,10 @@ public class CrawlDb extends Configured 
     FileSystem fs = FileSystem.get(getConf());
     Path lock = new Path(crawlDb, LOCK_NAME);
     LockUtil.createLockFile(fs, lock, force);
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
     if (LOG.isInfoEnabled()) {
-      LOG.info("CrawlDb update: starting");
+      LOG.info("CrawlDb update: starting at " + sdf.format(start));
       LOG.info("CrawlDb update: db: " + crawlDb);
       LOG.info("CrawlDb update: segments: " + Arrays.asList(segments));
       LOG.info("CrawlDb update: additions allowed: " + additionsAllowed);
@@ -100,7 +104,8 @@ public class CrawlDb extends Configured 
     }
 
     CrawlDb.install(job, crawlDb);
-    if (LOG.isInfoEnabled()) { LOG.info("CrawlDb update: done"); }
+    long end = System.currentTimeMillis();
+    LOG.info("CrawlDb update: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static JobConf createJob(Configuration config, Path crawlDb)

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/CrawlDbMerger.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/CrawlDbMerger.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/CrawlDbMerger.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/CrawlDbMerger.java Sat Jul  3 17:59:29 2010
@@ -18,6 +18,7 @@
 package org.apache.nutch.crawl;
 
 import java.io.IOException;
+import java.text.SimpleDateFormat;
 import java.util.*;
 import java.util.Map.Entry;
 
@@ -35,6 +36,7 @@ import org.apache.hadoop.util.*;
 import org.apache.hadoop.conf.*;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
 /**
  * This tool merges several CrawlDb-s into one, optionally filtering
@@ -112,6 +114,10 @@ public class CrawlDbMerger extends Confi
   }
 
   public void merge(Path output, Path[] dbs, boolean normalize, boolean filter) throws Exception {
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("CrawlDb merge: starting at " + sdf.format(start));
+
     JobConf job = createMergeJob(getConf(), output, normalize, filter);
     for (int i = 0; i < dbs.length; i++) {
       FileInputFormat.addInputPath(job, new Path(dbs[i], CrawlDb.CURRENT_NAME));
@@ -120,6 +126,8 @@ public class CrawlDbMerger extends Confi
     FileSystem fs = FileSystem.get(getConf());
     fs.mkdirs(output);
     fs.rename(FileOutputFormat.getOutputPath(job), new Path(output, CrawlDb.CURRENT_NAME));
+    long end = System.currentTimeMillis();
+    LOG.info("CrawlDb merge: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static JobConf createMergeJob(Configuration conf, Path output, boolean normalize, boolean filter) {

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/Generator.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/Generator.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/Generator.java Sat Jul  3 17:59:29 2010
@@ -44,6 +44,7 @@ import org.apache.nutch.scoring.ScoringF
 import org.apache.nutch.util.LockUtil;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 import org.apache.nutch.util.URLUtil;
 
 /**
@@ -472,8 +473,10 @@ public class Generator extends Configure
     FileSystem fs = FileSystem.get(getConf());
     LockUtil.createLockFile(fs, lock, force);
 
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("Generator: starting at " + sdf.format(start));
     LOG.info("Generator: Selecting best-scoring urls due for fetch.");
-    LOG.info("Generator: starting");
     LOG.info("Generator: filtering: " + filter);
     LOG.info("Generator: normalizing: " + norm);
     if (topN != Long.MAX_VALUE) {
@@ -586,9 +589,8 @@ public class Generator extends Configure
     LockUtil.removeLockFile(fs, lock);
     fs.delete(tempDir, true);
 
-    if (LOG.isInfoEnabled()) {
-      LOG.info("Generator: done.");
-    }
+    long end = System.currentTimeMillis();
+    LOG.info("Generator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
 
     Path[] patharray = new Path[generatedSegments.size()];
     return generatedSegments.toArray(patharray);

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/Injector.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/Injector.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/Injector.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/Injector.java Sat Jul  3 17:59:29 2010
@@ -18,6 +18,7 @@
 package org.apache.nutch.crawl;
 
 import java.io.*;
+import java.text.SimpleDateFormat;
 import java.util.*;
 
 // Commons Logging imports
@@ -35,6 +36,7 @@ import org.apache.nutch.scoring.ScoringF
 import org.apache.nutch.scoring.ScoringFilters;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
 /** This class takes a flat file of URLs and adds them to the of pages to be
  * crawled.  Useful for bootstrapping the system. 
@@ -79,6 +81,12 @@ public class Injector extends Configured
                     OutputCollector<Text, CrawlDatum> output, Reporter reporter)
       throws IOException {
       String url = value.toString();              // value is line of text
+
+      if (url != null && url.trim().startsWith("#")) {
+          /* Ignore line that start with # */
+          return;
+      }
+
       // if tabs : metadata that could be stored
       // must be name=value and separated by \t
       float customScore = -1f;
@@ -182,9 +190,10 @@ public class Injector extends Configured
   }
   
   public void inject(Path crawlDb, Path urlDir) throws IOException {
-
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
     if (LOG.isInfoEnabled()) {
-      LOG.info("Injector: starting");
+      LOG.info("Injector: starting at " + sdf.format(start));
       LOG.info("Injector: crawlDb: " + crawlDb);
       LOG.info("Injector: urlDir: " + urlDir);
     }
@@ -223,8 +232,9 @@ public class Injector extends Configured
     // clean up
     FileSystem fs = FileSystem.get(getConf());
     fs.delete(tempDir, true);
-    if (LOG.isInfoEnabled()) { LOG.info("Injector: done"); }
 
+    long end = System.currentTimeMillis();
+    LOG.info("Injector: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String[] args) throws Exception {

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/LinkDb.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/LinkDb.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/LinkDb.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/LinkDb.java Sat Jul  3 17:59:29 2010
@@ -18,6 +18,7 @@
 package org.apache.nutch.crawl;
 
 import java.io.*;
+import java.text.SimpleDateFormat;
 import java.util.*;
 import java.net.*;
 
@@ -39,6 +40,7 @@ import org.apache.nutch.util.HadoopFSUti
 import org.apache.nutch.util.LockUtil;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
 /** Maintains an inverted link map, listing incoming links for each url. */
 public class LinkDb extends Configured implements Tool, Mapper<Text, ParseData, Text, Inlinks> {
@@ -153,8 +155,11 @@ public class LinkDb extends Configured i
     FileSystem fs = FileSystem.get(getConf());
     LockUtil.createLockFile(fs, lock, force);
     Path currentLinkDb = new Path(linkDb, CURRENT_NAME);
+
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
     if (LOG.isInfoEnabled()) {
-      LOG.info("LinkDb: starting");
+      LOG.info("LinkDb: starting at " + sdf.format(start));
       LOG.info("LinkDb: linkdb: " + linkDb);
       LOG.info("LinkDb: URL normalize: " + normalize);
       LOG.info("LinkDb: URL filter: " + filter);
@@ -191,7 +196,9 @@ public class LinkDb extends Configured i
       fs.delete(newLinkDb, true);
     }
     LinkDb.install(job, linkDb);
-    if (LOG.isInfoEnabled()) { LOG.info("LinkDb: done"); }
+
+    long end = System.currentTimeMillis();
+    LOG.info("LinkDb: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   private static JobConf createJob(Configuration config, Path linkDb, boolean normalize, boolean filter) {

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/LinkDbMerger.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/LinkDbMerger.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/LinkDbMerger.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/LinkDbMerger.java Sat Jul  3 17:59:29 2010
@@ -17,6 +17,7 @@
 package org.apache.nutch.crawl;
 
 import java.io.IOException;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.Random;
@@ -42,6 +43,7 @@ import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
 /**
  * This tool merges several LinkDb-s into one, optionally filtering
@@ -100,6 +102,10 @@ public class LinkDbMerger extends Config
   public void close() throws IOException { }
 
   public void merge(Path output, Path[] dbs, boolean normalize, boolean filter) throws Exception {
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("LinkDb merge: starting at " + sdf.format(start));
+
     JobConf job = createMergeJob(getConf(), output, normalize, filter);
     for (int i = 0; i < dbs.length; i++) {
       FileInputFormat.addInputPath(job, new Path(dbs[i], LinkDb.CURRENT_NAME));      
@@ -108,6 +114,9 @@ public class LinkDbMerger extends Config
     FileSystem fs = FileSystem.get(getConf());
     fs.mkdirs(output);
     fs.rename(FileOutputFormat.getOutputPath(job), new Path(output, LinkDb.CURRENT_NAME));
+
+    long end = System.currentTimeMillis();
+    LOG.info("LinkDb merge: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static JobConf createMergeJob(Configuration config, Path linkDb, boolean normalize, boolean filter) {

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/LinkDbReader.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/LinkDbReader.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/LinkDbReader.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/crawl/LinkDbReader.java Sat Jul  3 17:59:29 2010
@@ -33,7 +33,9 @@ import org.apache.hadoop.conf.Configurat
 
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
+import java.text.SimpleDateFormat;
 import java.util.Iterator;
 
 /** . */
@@ -89,10 +91,11 @@ public class LinkDbReader extends Config
   }
   
   public void processDumpJob(String linkdb, String output) throws IOException {
-
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
     if (LOG.isInfoEnabled()) {
-      LOG.info("LinkDb dump: starting");
-      LOG.info("LinkDb db: " + linkdb);
+      LOG.info("LinkDb dump: starting at " + sdf.format(start));
+      LOG.info("LinkDb dump: db: " + linkdb);
     }
     Path outFolder = new Path(output);
 
@@ -108,6 +111,9 @@ public class LinkDbReader extends Config
     job.setOutputValueClass(Inlinks.class);
 
     JobClient.runJob(job);
+
+    long end = System.currentTimeMillis();
+    LOG.info("LinkDb dump: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
   
   public static void main(String[] args) throws Exception {

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/fetcher/Fetcher.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/fetcher/Fetcher.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/fetcher/Fetcher.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/fetcher/Fetcher.java Sat Jul  3 17:59:29 2010
@@ -21,6 +21,7 @@ import java.net.InetAddress;
 import java.net.MalformedURLException;
 import java.net.URL;
 import java.net.UnknownHostException;
+import java.text.SimpleDateFormat;
 import java.util.*;
 import java.util.Map.Entry;
 import java.util.concurrent.atomic.AtomicInteger;
@@ -1066,8 +1067,10 @@ public class Fetcher extends Configured 
 
     checkConfiguration();
 
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
     if (LOG.isInfoEnabled()) {
-      LOG.info("Fetcher: starting");
+      LOG.info("Fetcher: starting at " + sdf.format(start));
       LOG.info("Fetcher: segment: " + segment);
     }
 
@@ -1102,7 +1105,9 @@ public class Fetcher extends Configured 
     job.setOutputValueClass(NutchWritable.class);
 
     JobClient.runJob(job);
-    if (LOG.isInfoEnabled()) { LOG.info("Fetcher: done"); }
+
+    long end = System.currentTimeMillis();
+    LOG.info("Fetcher: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
 

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/fetcher/OldFetcher.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/fetcher/OldFetcher.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/fetcher/OldFetcher.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/fetcher/OldFetcher.java Sat Jul  3 17:59:29 2010
@@ -19,6 +19,7 @@ package org.apache.nutch.fetcher;
 
 import java.io.IOException;
 import java.net.MalformedURLException;
+import java.text.SimpleDateFormat;
 import java.util.Map.Entry;
 
 // Commons Logging imports
@@ -504,8 +505,10 @@ public class OldFetcher extends Configur
   public void fetch(Path segment, int threads)
     throws IOException {
 
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
     if (LOG.isInfoEnabled()) {
-      LOG.info("OldFetcher: starting");
+      LOG.info("OldFetcher: starting at " + sdf.format(start));
       LOG.info("OldFetcher: segment: " + segment);
     }
 
@@ -529,7 +532,8 @@ public class OldFetcher extends Configur
     job.setOutputValueClass(NutchWritable.class);
 
     JobClient.runJob(job);
-    if (LOG.isInfoEnabled()) { LOG.info("OldFetcher: done"); }
+    long end = System.currentTimeMillis();
+    LOG.info("OldFetcher: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
 

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/DeleteDuplicates.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/DeleteDuplicates.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/DeleteDuplicates.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/DeleteDuplicates.java Sat Jul  3 17:59:29 2010
@@ -36,6 +36,7 @@ import org.apache.nutch.util.NutchJob;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.document.DateTools;
 import org.apache.lucene.document.Document;
+import org.apache.nutch.util.TimingUtil;
 
 /**
  * Delete duplicate documents in a set of Lucene indexes.
@@ -418,7 +419,9 @@ public class DeleteDuplicates extends Co
   public void dedup(Path[] indexDirs)
     throws IOException {
 
-    if (LOG.isInfoEnabled()) { LOG.info("Dedup: starting"); }
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("Dedup: starting at " + sdf.format(start));
 
     Path outDir1 =
       new Path("dedup-urls-"+
@@ -492,7 +495,8 @@ public class DeleteDuplicates extends Co
 
     fs.delete(outDir2, true);
 
-    if (LOG.isInfoEnabled()) { LOG.info("Dedup: done"); }
+    long end = System.currentTimeMillis();
+    LOG.info("Dedup: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String[] args) throws Exception {

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/IndexMerger.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/IndexMerger.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/IndexMerger.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/IndexMerger.java Sat Jul  3 17:59:29 2010
@@ -18,6 +18,7 @@
 package org.apache.nutch.indexer;
 
 import java.io.*;
+import java.text.SimpleDateFormat;
 import java.util.*;
 
 import org.apache.commons.logging.Log;
@@ -37,6 +38,7 @@ import org.apache.lucene.store.FSDirecto
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.LogMergePolicy;
 import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.nutch.util.TimingUtil;
 
 /*************************************************************************
  * IndexMerger creates an index for the output corresponding to a 
@@ -62,7 +64,12 @@ public class IndexMerger extends Configu
    * Merge all input indexes to the single output index
    */
   public void merge(Path[] indexes, Path outputIndex, Path localWorkingDir) throws IOException {
-    LOG.info("merging indexes to: " + outputIndex);
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    if (LOG.isInfoEnabled()) {
+      LOG.info("IndexMerger: starting at " + sdf.format(start));
+      LOG.info("IndexMerger: merging indexes to: " + outputIndex);
+    }
 
     FileSystem localFs = FileSystem.getLocal(getConf());  
     if (localFs.exists(localWorkingDir)) {
@@ -107,7 +114,8 @@ public class IndexMerger extends Configu
     // Put target back
     //
     fs.completeLocalOutput(outputIndex, tmpLocalOutput);
-    LOG.info("done merging");
+    long end = System.currentTimeMillis();
+    LOG.info("IndexMerger: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   /** 

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/IndexSorter.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/IndexSorter.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/IndexSorter.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/IndexSorter.java Sat Jul  3 17:59:29 2010
@@ -19,6 +19,7 @@ package org.apache.nutch.indexer;
 
 import java.io.File;
 import java.io.IOException;
+import java.text.SimpleDateFormat;
 import java.util.Date;
 import java.util.Arrays;
 
@@ -35,6 +36,7 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.conf.Configured;
 import org.apache.hadoop.util.*;
+import org.apache.nutch.util.TimingUtil;
 
 /** Sort a Nutch index by page score.  Higher scoring documents are assigned
  * smaller document numbers. */
@@ -261,8 +263,9 @@ public class IndexSorter extends Configu
   }
   
   public void sort(File directory) throws IOException {
-    LOG.info("IndexSorter: starting.");
-    Date start = new Date();
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("IndexSorter: starting at " + sdf.format(start));
     int termIndexInterval = getConf().getInt("indexer.termIndexInterval", 128);
     IndexReader reader = IndexReader.open(
     		FSDirectory.open(new File(directory, "index")));
@@ -276,9 +279,8 @@ public class IndexSorter extends Configu
     writer.setUseCompoundFile(false);
     writer.addIndexes(new IndexReader[] { sorter });
     writer.close();
-    Date end = new Date();
-    LOG.info("IndexSorter: done, " + (end.getTime() - start.getTime())
-        + " total milliseconds");
+    long end = System.currentTimeMillis();
+    LOG.info("IndexSorter: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   private static int[] oldToNew(IndexReader reader) throws IOException {

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/Indexer.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/Indexer.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/Indexer.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/Indexer.java Sat Jul  3 17:59:29 2010
@@ -18,6 +18,7 @@
 package org.apache.nutch.indexer;
 
 import java.io.IOException;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.List;
 
@@ -35,6 +36,7 @@ import org.apache.hadoop.util.ToolRunner
 import org.apache.nutch.indexer.lucene.LuceneWriter;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
 /** Create indexes for segments. */
 public class Indexer extends Configured implements Tool {
@@ -54,7 +56,9 @@ public class Indexer extends Configured 
   public void index(Path luceneDir, Path crawlDb,
                     Path linkDb, List<Path> segments)
   throws IOException {
-    LOG.info("Indexer: starting");
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("Indexer: starting at " + sdf.format(start));
 
     final JobConf job = new NutchJob(getConf());
     job.setJobName("index-lucene " + luceneDir);
@@ -70,7 +74,8 @@ public class Indexer extends Configured 
     NutchIndexWriterFactory.addClassToConf(job, LuceneWriter.class);
 
     JobClient.runJob(job);
-    LOG.info("Indexer: done");
+    long end = System.currentTimeMillis();
+    LOG.info("Indexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public int run(String[] args) throws Exception {

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/AnchorFields.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/AnchorFields.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/AnchorFields.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/AnchorFields.java Sat Jul  3 17:59:29 2010
@@ -17,6 +17,7 @@
 package org.apache.nutch.indexer.field;
 
 import java.io.IOException;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
@@ -58,6 +59,7 @@ import org.apache.nutch.scoring.webgraph
 import org.apache.nutch.scoring.webgraph.WebGraph;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
 /**
  * Creates FieldWritable objects for inbound anchor text.   These FieldWritable
@@ -357,6 +359,9 @@ public class AnchorFields
    */
   public void createFields(Path webGraphDb, Path basicFields, Path output)
     throws IOException {
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("AnchorFields: starting at " + sdf.format(start));
 
     Configuration conf = getConf();
     FileSystem fs = FileSystem.get(conf);
@@ -365,6 +370,8 @@ public class AnchorFields
     runExtractor(webGraphDb, tempLinks);
     runCollector(basicFields, tempLinks, output);
     fs.delete(tempLinks, true);
+    long end = System.currentTimeMillis();
+    LOG.info("AnchorFields: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String[] args)

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/BasicFields.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/BasicFields.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/BasicFields.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/BasicFields.java Sat Jul  3 17:59:29 2010
@@ -17,6 +17,7 @@
 package org.apache.nutch.indexer.field;
 
 import java.io.IOException;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
@@ -65,6 +66,7 @@ import org.apache.nutch.scoring.webgraph
 import org.apache.nutch.scoring.webgraph.WebGraph;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 import org.apache.nutch.util.URLUtil;
 
 /**
@@ -691,6 +693,10 @@ public class BasicFields
   public void createFields(Path nodeDb, Path[] segments, Path output)
     throws IOException {
 
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("BasicFields: starting at " + sdf.format(start));
+
     Configuration conf = getConf();
     FileSystem fs = FileSystem.get(conf);
     Path tempOutput = new Path(output.toString() + "-temp");
@@ -720,6 +726,8 @@ public class BasicFields
     // merge all of the segments and delete any temporary output
     runMerger(basicFields, output);
     fs.delete(tempOutput, true);
+    long end = System.currentTimeMillis();
+    LOG.info("BasicFields: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String[] args)

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/CustomFields.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/CustomFields.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/CustomFields.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/CustomFields.java Sat Jul  3 17:59:29 2010
@@ -18,6 +18,7 @@ package org.apache.nutch.indexer.field;
 
 import java.io.IOException;
 import java.io.InputStream;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Enumeration;
 import java.util.HashMap;
@@ -62,6 +63,7 @@ import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
 /**
  * Creates custom FieldWritable objects from a text file containing field
@@ -375,6 +377,10 @@ public class CustomFields
   void createFields(Path basicFields, Path[] inputs, Path output)
     throws IOException {
 
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("CustomerFields: starting at " + sdf.format(start));
+
     Configuration conf = getConf();
     FileSystem fs = FileSystem.get(conf);
     Path tempFields = new Path(output + "-"
@@ -382,6 +388,8 @@ public class CustomFields
     runConverter(inputs, tempFields);
     runCollector(basicFields, tempFields, output);
     fs.delete(tempFields, true);
+    long end = System.currentTimeMillis();
+    LOG.info("CommonFields: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String[] args)

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/FieldIndexer.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/FieldIndexer.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/FieldIndexer.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/field/FieldIndexer.java Sat Jul  3 17:59:29 2010
@@ -20,6 +20,7 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.File;
 import java.io.IOException;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
@@ -68,6 +69,7 @@ import org.apache.nutch.indexer.NutchSim
 import org.apache.nutch.util.LogUtil;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
 public class FieldIndexer
   extends Configured
@@ -248,7 +250,9 @@ public class FieldIndexer
   public void index(Path[] fields, Path indexDir)
     throws IOException {
 
-    LOG.info("FieldIndexer: starting");
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("FieldIndexer: starting at " + sdf.format(start));
 
     JobConf job = new NutchJob(getConf());
     job.setJobName("FieldIndexer: " + indexDir);
@@ -270,9 +274,8 @@ public class FieldIndexer
     job.setOutputValueClass(LuceneDocumentWrapper.class);
 
     JobClient.runJob(job);
-    if (LOG.isInfoEnabled()) {
-      LOG.info("FieldIndexer: done");
-    }
+    long end = System.currentTimeMillis();
+    LOG.info("FieldIndexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String[] args)

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/solr/SolrDeleteDuplicates.java Sat Jul  3 17:59:29 2010
@@ -20,6 +20,7 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
 import java.net.MalformedURLException;
+import java.text.SimpleDateFormat;
 import java.util.Iterator;
 
 import org.apache.commons.logging.Log;
@@ -42,6 +43,7 @@ import org.apache.hadoop.util.ToolRunner
 import org.apache.nutch.indexer.DeleteDuplicates;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 import org.apache.solr.client.solrj.SolrQuery;
 import org.apache.solr.client.solrj.SolrServer;
 import org.apache.solr.client.solrj.SolrServerException;
@@ -122,14 +124,12 @@ Tool {
       tstamp = (Long)doc.getFieldValue(SolrConstants.TIMESTAMP_FIELD);
     }
 
-    @Override
     public void readFields(DataInput in) throws IOException {
       id = Text.readString(in);
       boost = in.readFloat();
       tstamp = in.readLong();
     }
 
-    @Override
     public void write(DataOutput out) throws IOException {
       Text.writeString(out, id);
       out.writeFloat(boost);
@@ -157,23 +157,19 @@ Tool {
       return numDocs;
     }
 
-    @Override
     public long getLength() throws IOException {
       return numDocs;
     }
 
-    @Override
     public String[] getLocations() throws IOException {
       return new String[] {} ;
     }
 
-    @Override
     public void readFields(DataInput in) throws IOException {
       docBegin = in.readInt();
       numDocs = in.readInt();
     }
 
-    @Override
     public void write(DataOutput out) throws IOException {
       out.writeInt(docBegin);
       out.writeInt(numDocs);
@@ -239,30 +235,24 @@ Tool {
 
         private int currentDoc = 0;
 
-        @Override
         public void close() throws IOException { }
 
-        @Override
         public Text createKey() {
           return new Text();
         }
 
-        @Override
         public SolrRecord createValue() {
           return new SolrRecord();
         }
 
-        @Override
         public long getPos() throws IOException {
           return currentDoc;
         }
 
-        @Override
         public float getProgress() throws IOException {
           return currentDoc / (float) numDocs;
         }
 
-        @Override
         public boolean next(Text key, SolrRecord value) throws IOException {
           if (currentDoc >= numDocs) {
             return false;
@@ -288,17 +278,14 @@ Tool {
 
   private UpdateRequest updateRequest = new UpdateRequest();
 
-  @Override
   public Configuration getConf() {
     return conf;
   }
 
-  @Override
   public void setConf(Configuration conf) {
     this.conf = conf;
   }
 
-  @Override
   public void configure(JobConf job) {
     try {
       solr = new CommonsHttpSolrServer(job.get(SolrConstants.SERVER_URL));
@@ -308,10 +295,10 @@ Tool {
   }
 
 
-  @Override
   public void close() throws IOException {
     try {
       if (numDeletes > 0) {
+        LOG.info("SolrDeleteDuplicates: deleting " + numDeletes + " duplicates");
         updateRequest.process(solr);
       }
     } catch (SolrServerException e) {
@@ -319,7 +306,6 @@ Tool {
     }
   }
 
-  @Override
   public void reduce(Text key, Iterator<SolrRecord> values,
       OutputCollector<Text, SolrRecord> output, Reporter reporter)
   throws IOException {
@@ -337,6 +323,7 @@ Tool {
       numDeletes++;
       if (numDeletes >= NUM_MAX_DELETE_REQUEST) {
         try {
+          LOG.info("SolrDeleteDuplicates: deleting " + numDeletes + " duplicates");
           updateRequest.process(solr);
         } catch (SolrServerException e) {
           throw new IOException(e);
@@ -348,7 +335,9 @@ Tool {
   }
 
   public void dedup(String solrUrl) throws IOException {
-    LOG.info("SolrDeleteDuplicates: starting...");
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("SolrDeleteDuplicates: starting at " + sdf.format(start));
     LOG.info("SolrDeleteDuplicates: Solr url: " + solrUrl);
     
     JobConf job = new NutchJob(getConf());
@@ -362,8 +351,9 @@ Tool {
     job.setReducerClass(SolrDeleteDuplicates.class);
 
     JobClient.runJob(job);
-    
-    LOG.info("SolrDeleteDuplicates: done.");
+
+    long end = System.currentTimeMillis();
+    LOG.info("SolrDeleteDuplicates: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public int run(String[] args) throws IOException {

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/indexer/solr/SolrIndexer.java Sat Jul  3 17:59:29 2010
@@ -16,11 +16,6 @@
  */
 package org.apache.nutch.indexer.solr;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Random;
-
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -37,9 +32,16 @@ import org.apache.nutch.indexer.IndexerM
 import org.apache.nutch.indexer.NutchIndexWriterFactory;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 import org.apache.solr.client.solrj.SolrServer;
 import org.apache.solr.client.solrj.impl.CommonsHttpSolrServer;
 
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+
 public class SolrIndexer extends Configured implements Tool {
 
   public static Log LOG = LogFactory.getLog(SolrIndexer.class);
@@ -54,7 +56,9 @@ public class SolrIndexer extends Configu
 
   public void indexSolr(String solrUrl, Path crawlDb, Path linkDb,
       List<Path> segments) throws IOException {
-    LOG.info("SolrIndexer: starting");
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("SolrIndexer: starting at " + sdf.format(start));
 
     final JobConf job = new NutchJob(getConf());
     job.setJobName("index-solr " + solrUrl);
@@ -76,13 +80,14 @@ public class SolrIndexer extends Configu
       // do the commits once and for all the reducers in one go
       SolrServer solr =  new CommonsHttpSolrServer(solrUrl);
       solr.commit();
-    } 
+      long end = System.currentTimeMillis();
+      LOG.info("SolrIndexer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    }
     catch (Exception e){
       LOG.error(e);
     } finally {
       FileSystem.get(job).delete(tmp, true);
     }
-    LOG.info("SolrIndexer: done");
   }
 
   public int run(String[] args) throws Exception {

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/parse/ParseSegment.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/parse/ParseSegment.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/parse/ParseSegment.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/parse/ParseSegment.java Sat Jul  3 17:59:29 2010
@@ -34,6 +34,7 @@ import org.apache.nutch.util.*;
 import org.apache.hadoop.fs.Path;
 
 import java.io.*;
+import java.text.SimpleDateFormat;
 import java.util.*;
 import java.util.Map.Entry;
 
@@ -42,7 +43,7 @@ public class ParseSegment extends Config
     Mapper<WritableComparable, Content, Text, ParseImpl>,
     Reducer<Text, Writable, Text, Writable> {
 
-  public static final Log LOG = LogFactory.getLog(Parser.class);
+  public static final Log LOG = LogFactory.getLog(ParseSegment.class);
   
   private ScoringFilters scfilters;
   
@@ -131,9 +132,11 @@ public class ParseSegment extends Config
 
   public void parse(Path segment) throws IOException {
 
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
     if (LOG.isInfoEnabled()) {
-      LOG.info("Parse: starting");
-      LOG.info("Parse: segment: " + segment);
+      LOG.info("ParseSegment: starting at " + sdf.format(start));
+      LOG.info("ParseSegment: segment: " + segment);
     }
 
     JobConf job = new NutchJob(getConf());
@@ -151,7 +154,8 @@ public class ParseSegment extends Config
     job.setOutputValueClass(ParseImpl.class);
 
     JobClient.runJob(job);
-    if (LOG.isInfoEnabled()) { LOG.info("Parse: done"); }
+    long end = System.currentTimeMillis();
+    LOG.info("ParseSegment: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
 

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java Sat Jul  3 17:59:29 2010
@@ -19,6 +19,7 @@ package org.apache.nutch.scoring.webgrap
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
@@ -62,6 +63,7 @@ import org.apache.nutch.scoring.webgraph
 import org.apache.nutch.util.FSUtils;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
 /**
  * The LinkDumper tool creates a database of node to inlink information that can
@@ -346,7 +348,9 @@ public class LinkDumper
   public void dumpLinks(Path webGraphDb)
     throws IOException {
 
-    LOG.info("NodeDumper: starting");
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("NodeDumper: starting at " + sdf.format(start));
     Configuration conf = getConf();
     FileSystem fs = FileSystem.get(conf);
 
@@ -410,6 +414,8 @@ public class LinkDumper
     }
 
     fs.delete(tempInverted, true);
+    long end = System.currentTimeMillis();
+    LOG.info("LinkDumper: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String[] args)

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java Sat Jul  3 17:59:29 2010
@@ -19,6 +19,7 @@ package org.apache.nutch.scoring.webgrap
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStreamReader;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -64,6 +65,7 @@ import org.apache.nutch.scoring.webgraph
 import org.apache.nutch.util.FSUtils;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 import org.apache.nutch.util.URLUtil;
 
 public class LinkRank
@@ -579,6 +581,10 @@ public class LinkRank
   public void analyze(Path webGraphDb)
     throws IOException {
 
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("Analysis: starting at " + sdf.format(start));
+
     // store the link rank under the webgraphdb temporarily, final scores get
     // upddated into the nodedb
     Path linkRank = new Path(webGraphDb, "linkrank");
@@ -606,8 +612,8 @@ public class LinkRank
     float rankOneScore = (1f / (float)numLinks);
 
     if (LOG.isInfoEnabled()) {
-      LOG.info("Number of links " + numLinks);
-      LOG.info("Rank One " + rankOneScore);
+      LOG.info("Analysis: Number of links: " + numLinks);
+      LOG.info("Analysis: Rank One: " + rankOneScore);
     }
 
     // run invert and analysis for a given number of iterations to allow the
@@ -616,7 +622,7 @@ public class LinkRank
     for (int i = 0; i < numIterations; i++) {
 
       // the input to inverting is always the previous output from analysis
-      LOG.info("Running iteration " + (i + 1) + " of " + numIterations);
+      LOG.info("Analysis: Starting iteration " + (i + 1) + " of " + numIterations);
       Path tempRank = new Path(linkRank + "-"
         + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
       fs.mkdirs(tempRank);
@@ -629,19 +635,20 @@ public class LinkRank
         rankOneScore);
 
       // replace the temporary NodeDb with the output from analysis
-      LOG.info("Installing new link scores");
+      LOG.info("Analysis: Installing new link scores");
       FSUtils.replace(fs, linkRank, tempRank, true);
-      LOG.info("Finished analysis iteration " + (i + 1) + " of "
+      LOG.info("Analysis: finished iteration " + (i + 1) + " of "
         + numIterations);
     }
 
     // replace the NodeDb in the WebGraph with the final output of analysis
-    LOG.info("Installing web graph nodes");
+    LOG.info("Analysis: Installing web graph nodes");
     FSUtils.replace(fs, wgNodeDb, nodeDb, true);
 
     // remove the temporary link rank folder
     fs.delete(linkRank, true);
-    LOG.info("Finished analysis");
+    long end = System.currentTimeMillis();
+    LOG.info("Analysis: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String[] args)

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/Loops.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/Loops.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/Loops.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/Loops.java Sat Jul  3 17:59:29 2010
@@ -19,6 +19,7 @@ package org.apache.nutch.scoring.webgrap
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -61,6 +62,7 @@ import org.apache.hadoop.util.ToolRunner
 import org.apache.nutch.util.FSUtils;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
 /**
  * The Loops job identifies cycles of loops inside of the web graph. This is
@@ -466,8 +468,10 @@ public class Loops
   public void findLoops(Path webGraphDb)
     throws IOException {
 
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
     if (LOG.isInfoEnabled()) {
-      LOG.info("Loops: starting");
+      LOG.info("Loops: starting at " + sdf.format(start));
       LOG.info("Loops: webgraphdb: " + webGraphDb);
     }
 
@@ -495,11 +499,11 @@ public class Loops
     init.setOutputFormat(SequenceFileOutputFormat.class);
 
     try {
-      LOG.info("Initializer: running");
+      LOG.info("Loops: starting initializer");
       JobClient.runJob(init);
-      LOG.info("Initializer: installing " + routes);
+      LOG.info("Loops: installing initializer " + routes);
       FSUtils.replace(fs, routes, tempRoute, true);
-      LOG.info("Initializer: finished");
+      LOG.info("Loops: finished initializer");
     }
     catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
@@ -527,11 +531,11 @@ public class Loops
       looper.setBoolean("last", i == (depth - 1));
 
       try {
-        LOG.info("Looper: running");
+        LOG.info("Loops: starting looper");
         JobClient.runJob(looper);
-        LOG.info("Looper: installing " + routes);
+        LOG.info("Loops: installing looper " + routes);
         FSUtils.replace(fs, routes, tempRoute, true);
-        LOG.info("Looper: finished");
+        LOG.info("Loops: finished looper");
       }
       catch (IOException e) {
         LOG.error(StringUtils.stringifyException(e));
@@ -554,14 +558,16 @@ public class Loops
     finalizer.setOutputFormat(MapFileOutputFormat.class);
 
     try {
-      LOG.info("Finalizer: running");
+      LOG.info("Loops: starting finalizer");
       JobClient.runJob(finalizer);
-      LOG.info("Finalizer: finished");
+      LOG.info("Loops: finished finalizer");
     }
     catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
+    long end = System.currentTimeMillis();
+    LOG.info("Loops: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String[] args)

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java Sat Jul  3 17:59:29 2010
@@ -17,6 +17,7 @@
 package org.apache.nutch.scoring.webgraph;
 
 import java.io.IOException;
+import java.text.SimpleDateFormat;
 import java.util.Iterator;
 
 import org.apache.commons.cli.CommandLine;
@@ -49,6 +50,7 @@ import org.apache.hadoop.util.Tool;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
 /**
  * A tools that dumps out the top urls by number of inlinks, number of outlinks,
@@ -152,9 +154,6 @@ public class NodeDumper
    * 
    * @param webGraphDb The WebGraph from which to pull values.
    * 
-   * @param inlinks
-   * @param outlinks
-   * @param scores
    * @param topN
    * @param output
    * 
@@ -163,7 +162,9 @@ public class NodeDumper
   public void dumpNodes(Path webGraphDb, DumpType type, long topN, Path output)
     throws IOException {
 
-    LOG.info("NodeDumper: starting");
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("NodeDumper: starting at " + sdf.format(start));
     Path nodeDb = new Path(webGraphDb, WebGraph.NODE_DIR);
     Configuration conf = getConf();
 
@@ -193,6 +194,8 @@ public class NodeDumper
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
+    long end = System.currentTimeMillis();
+    LOG.info("NodeDumper: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String[] args)

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java Sat Jul  3 17:59:29 2010
@@ -17,6 +17,7 @@
 package org.apache.nutch.scoring.webgraph;
 
 import java.io.IOException;
+import java.text.SimpleDateFormat;
 import java.util.Iterator;
 import java.util.Random;
 
@@ -53,6 +54,7 @@ import org.apache.nutch.crawl.CrawlDatum
 import org.apache.nutch.crawl.CrawlDb;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
 /**
  * Updates the score from the WebGraph node database into the crawl database.
@@ -151,6 +153,10 @@ public class ScoreUpdater
   public void update(Path crawlDb, Path webGraphDb)
     throws IOException {
 
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("ScoreUpdater: starting at " + sdf.format(start));
+
     Configuration conf = getConf();
     FileSystem fs = FileSystem.get(conf);
 
@@ -190,8 +196,11 @@ public class ScoreUpdater
     }
 
     // install the temp crawl database
-    LOG.info("Installing new crawldb " + crawlDb);
+    LOG.info("ScoreUpdater: installing new crawldb " + crawlDb);
     CrawlDb.install(updater, crawlDb);
+
+    long end = System.currentTimeMillis();
+    LOG.info("ScoreUpdater: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String[] args)

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java Sat Jul  3 17:59:29 2010
@@ -17,6 +17,7 @@
 package org.apache.nutch.scoring.webgraph;
 
 import java.io.IOException;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -63,6 +64,7 @@ import org.apache.nutch.util.FSUtils;
 import org.apache.nutch.util.LockUtil;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 import org.apache.nutch.util.URLUtil;
 
 /**
@@ -437,8 +439,10 @@ public class WebGraph
   public void createWebGraph(Path webGraphDb, Path[] segments)
     throws IOException {
 
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
     if (LOG.isInfoEnabled()) {
-      LOG.info("WebGraphDb: starting");
+      LOG.info("WebGraphDb: starting at " + sdf.format(start));
       LOG.info("WebGraphDb: webgraphdb: " + webGraphDb);
     }
 
@@ -590,6 +594,9 @@ public class WebGraph
 
     // remove the lock file for the webgraph
     LockUtil.removeLockFile(fs, lock);
+
+    long end = System.currentTimeMillis();
+    LOG.info("WebGraphDb: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String[] args)

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/CrawlDBScanner.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/CrawlDBScanner.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/CrawlDBScanner.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/CrawlDBScanner.java Sat Jul  3 17:59:29 2010
@@ -17,6 +17,7 @@
 package org.apache.nutch.tools;
 
 import java.io.IOException;
+import java.text.SimpleDateFormat;
 import java.util.Iterator;
 
 import org.apache.commons.logging.Log;
@@ -43,6 +44,7 @@ import org.apache.nutch.crawl.CrawlDatum
 import org.apache.nutch.crawl.CrawlDb;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
 /**
  * Dumps all the entries matching a regular expression on their URL. Generates a
@@ -99,6 +101,10 @@ public class CrawlDBScanner extends Conf
   private void scan(Path crawlDb, Path outputPath, String regex, String status,
       boolean text) throws IOException {
 
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("CrawlDB scanner: starting at " + sdf.format(start));
+
     JobConf job = new NutchJob(getConf());
 
     job.setJobName("Scan : " + crawlDb + " for URLS matching : " + regex);
@@ -139,6 +145,9 @@ public class CrawlDBScanner extends Conf
     } catch (IOException e) {
       throw e;
     }
+
+    long end = System.currentTimeMillis();
+    LOG.info("CrawlDb scanner: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String args[]) throws Exception {

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/FreeGenerator.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/FreeGenerator.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/FreeGenerator.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/FreeGenerator.java Sat Jul  3 17:59:29 2010
@@ -18,6 +18,7 @@
 package org.apache.nutch.tools;
 
 import java.io.IOException;
+import java.text.SimpleDateFormat;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.Map.Entry;
@@ -50,6 +51,7 @@ import org.apache.nutch.net.URLNormalize
 import org.apache.nutch.scoring.ScoringFilters;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
 /**
  * This tool generates fetchlists (segments to be fetched) from plain text
@@ -157,6 +159,10 @@ public class FreeGenerator extends Confi
       }
     }
     
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("FreeGenerator: starting at " + sdf.format(start));
+
     JobConf job = new NutchJob(getConf());
     job.setBoolean(FILTER_KEY, filter);
     job.setBoolean(NORMALIZE_KEY, normalize);
@@ -177,11 +183,13 @@ public class FreeGenerator extends Confi
         new Path(segName, CrawlDatum.GENERATE_DIR_NAME)));
     try {
       JobClient.runJob(job);
-      return 0;
     } catch (Exception e) {
       LOG.fatal("FAILED: " + StringUtils.stringifyException(e));
       return -1;
     }
+    long end = System.currentTimeMillis();
+    LOG.info("FreeGenerator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    return 0;
   }
 
   public static void main(String[] args) throws Exception {

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/arc/ArcSegmentCreator.java Sat Jul  3 17:59:29 2010
@@ -59,6 +59,7 @@ import org.apache.nutch.util.LogUtil;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
 import org.apache.nutch.util.StringUtil;
+import org.apache.nutch.util.TimingUtil;
 
 /**
  * <p>The <code>ArcSegmentCreator</code> is a replacement for fetcher that will
@@ -346,8 +347,10 @@ public class ArcSegmentCreator
   public void createSegments(Path arcFiles, Path segmentsOutDir)
     throws IOException {
 
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
     if (LOG.isInfoEnabled()) {
-      LOG.info("ArcSegmentCreator: starting");
+      LOG.info("ArcSegmentCreator: starting at " + sdf.format(start));
       LOG.info("ArcSegmentCreator: arc files dir: " + arcFiles);
     }
 
@@ -364,9 +367,9 @@ public class ArcSegmentCreator
     job.setOutputValueClass(NutchWritable.class);
 
     JobClient.runJob(job);
-    if (LOG.isInfoEnabled()) {
-      LOG.info("ArcSegmentCreator: done");
-    }
+
+    long end = System.currentTimeMillis();
+    LOG.info("ArcSegmentCreator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   public static void main(String args[])

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/compat/CrawlDbConverter.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/compat/CrawlDbConverter.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/compat/CrawlDbConverter.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/compat/CrawlDbConverter.java Sat Jul  3 17:59:29 2010
@@ -18,6 +18,7 @@
 package org.apache.nutch.tools.compat;
 
 import java.io.IOException;
+import java.text.SimpleDateFormat;
 import java.util.Iterator;
 import java.util.Random;
 
@@ -47,6 +48,7 @@ import org.apache.nutch.crawl.CrawlDatum
 import org.apache.nutch.crawl.CrawlDb;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 
 /**
  * This tool converts CrawlDb created in old &lt;UTF8, CrawlDatum&gt; format
@@ -129,6 +131,10 @@ public class CrawlDbConverter extends Co
     if (args.length > 2 && args[2].equalsIgnoreCase("-withMetadata"))
       withMetadata = true;
     
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("CrawlDbConverter: starting at " + sdf.format(start));
+
     job.setBoolean(CONVERT_META_KEY, withMetadata);
     FileInputFormat.addInputPath(job, oldDb);
     job.setInputFormat(SequenceFileInputFormat.class);
@@ -140,10 +146,13 @@ public class CrawlDbConverter extends Co
     try {
       JobClient.runJob(job);
       CrawlDb.install(job, new Path(args[1]));
-      return 0;
     } catch (Exception e) {
       LOG.fatal("Error: " + StringUtils.stringifyException(e));
       return -1;
     }
+
+    long end = System.currentTimeMillis();
+    LOG.info("CrawlDb scanner: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    return 0;
   }
 }

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/compat/ReprUrlFixer.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/compat/ReprUrlFixer.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/compat/ReprUrlFixer.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/tools/compat/ReprUrlFixer.java Sat Jul  3 17:59:29 2010
@@ -19,6 +19,7 @@ package org.apache.nutch.tools.compat;
 import java.io.IOException;
 import java.net.MalformedURLException;
 import java.net.URL;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
@@ -59,6 +60,7 @@ import org.apache.nutch.scoring.webgraph
 import org.apache.nutch.util.FSUtils;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 import org.apache.nutch.util.URLUtil;
 
 /**
@@ -157,13 +159,17 @@ public class ReprUrlFixer
   public void update(Path crawlDb, Path[] segments)
     throws IOException {
 
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("ReprUrlFixer: starting at " + sdf.format(start));
+
     Configuration conf = getConf();
     FileSystem fs = FileSystem.get(conf);
 
     // run the crawl database through the repr fixer
     if (crawlDb != null) {
 
-      LOG.info("Running ReprUtilFixer " + crawlDb);
+      LOG.info("ReprUrlFixer: crawlDb " + crawlDb);
       Path crawlDbCurrent = new Path(crawlDb, CrawlDb.CURRENT_NAME);
       Path newCrawlDb = new Path(crawlDb,
         Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
@@ -180,7 +186,7 @@ public class ReprUrlFixer
 
       try {
         JobClient.runJob(updater);
-        LOG.info("Installing new crawldb " + crawlDb);
+        LOG.info("ReprUrlFixer: installing new crawldb " + crawlDb);
         CrawlDb.install(updater, crawlDb);
       }
       catch (IOException e) {
@@ -196,13 +202,13 @@ public class ReprUrlFixer
       for (int i = 0; i < segments.length; i++) {
 
         Path segment = segments[i];
-        LOG.info("Running ReprUtilFixer " + segment + " fetch");
+        LOG.info("ReprUrlFixer: fetching segment " + segment);
         Path segFetch = new Path(segment, CrawlDatum.FETCH_DIR_NAME);
         Path newSegFetch = new Path(segment, CrawlDatum.FETCH_DIR_NAME + "-"
           + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
         JobConf fetch = new NutchJob(conf);
-        fetch.setJobName("ReprUtilFixer: " + segment.toString());
+        fetch.setJobName("ReprUrlFixer: " + segment.toString());
         FileInputFormat.addInputPath(fetch, segFetch);
         FileOutputFormat.setOutputPath(fetch, newSegFetch);
         fetch.setInputFormat(SequenceFileInputFormat.class);
@@ -213,7 +219,7 @@ public class ReprUrlFixer
 
         try {
           JobClient.runJob(fetch);
-          LOG.info("Installing new segment fetch directory " + newSegFetch);
+          LOG.info("ReprUrlFixer: installing new segment fetch directory " + newSegFetch);
           FSUtils.replace(fs, segFetch, newSegFetch, true);
           LOG.info("ReprUrlFixer: finished installing segment fetch directory");
         }
@@ -222,13 +228,13 @@ public class ReprUrlFixer
           throw e;
         }
 
-        LOG.info("Running ReprUtilFixer " + segment + " parse");
+        LOG.info("ReprUrlFixer: parsing segment " + segment);
         Path segParse = new Path(segment, CrawlDatum.PARSE_DIR_NAME);
         Path newSegParse = new Path(segment, CrawlDatum.PARSE_DIR_NAME + "-"
           + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
         JobConf parse = new NutchJob(conf);
-        parse.setJobName("ReprUtilFixer: " + segment.toString());
+        parse.setJobName("ReprUrlFixer: " + segment.toString());
         FileInputFormat.addInputPath(parse, segParse);
         FileOutputFormat.setOutputPath(parse, newSegParse);
         parse.setInputFormat(SequenceFileInputFormat.class);
@@ -239,7 +245,7 @@ public class ReprUrlFixer
 
         try {
           JobClient.runJob(parse);
-          LOG.info("Installing new segment parse directry " + newSegParse);
+          LOG.info("ReprUrlFixer: installing new segment parse directry " + newSegParse);
           FSUtils.replace(fs, segParse, newSegParse, true);
           LOG.info("ReprUrlFixer: finished installing segment parse directory");
         }
@@ -249,6 +255,9 @@ public class ReprUrlFixer
         }
       }
     }
+
+    long end = System.currentTimeMillis();
+    LOG.info("ReprUrlFixer: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
   }
 
   /**

Added: nutch/branches/branch-1.2/src/java/org/apache/nutch/util/TimingUtil.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/util/TimingUtil.java?rev=960248&view=auto
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/util/TimingUtil.java (added)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/util/TimingUtil.java Sat Jul  3 17:59:29 2010
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nutch.util;
+
+import java.text.NumberFormat;
+
+public class TimingUtil {
+
+    private static long[] TIME_FACTOR = { 60 * 60 * 1000, 60 * 1000, 1000 };
+
+    /**
+     * Calculate the elapsed time between two times specified in milliseconds.
+     * @param start The start of the time period
+     * @param end The end of the time period
+     * @return a string of the form "XhYmZs" when the elapsed time is X hours, Y minutes and Z seconds or null if start > end.
+     */
+    public static String elapsedTime(long start, long end){
+        if (start > end) {
+            return null;
+        }
+
+        long[] elapsedTime = new long[TIME_FACTOR.length];
+
+        for (int i = 0; i < TIME_FACTOR.length; i++) {
+            elapsedTime[i] = start > end ? -1 : (end - start) / TIME_FACTOR[i];
+            start += TIME_FACTOR[i] * elapsedTime[i];
+        }
+
+        NumberFormat nf = NumberFormat.getInstance();
+        nf.setMinimumIntegerDigits(2);
+        StringBuffer buf = new StringBuffer();
+        for (int i = 0; i < elapsedTime.length; i++) {
+            if (i > 0) {
+                buf.append(":");
+            }
+            buf.append(nf.format(elapsedTime[i]));
+        }
+        return buf.toString();
+    }
+}

Propchange: nutch/branches/branch-1.2/src/java/org/apache/nutch/util/TimingUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: nutch/branches/branch-1.2/src/java/org/apache/nutch/util/domain/DomainStatistics.java
URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.2/src/java/org/apache/nutch/util/domain/DomainStatistics.java?rev=960248&r1=960247&r2=960248&view=diff
==============================================================================
--- nutch/branches/branch-1.2/src/java/org/apache/nutch/util/domain/DomainStatistics.java (original)
+++ nutch/branches/branch-1.2/src/java/org/apache/nutch/util/domain/DomainStatistics.java Sat Jul  3 17:59:29 2010
@@ -19,6 +19,7 @@ package org.apache.nutch.util.domain;
 
 import java.io.IOException;
 import java.net.URL;
+import java.text.SimpleDateFormat;
 import java.util.Iterator;
 
 import org.apache.commons.logging.Log;
@@ -43,6 +44,7 @@ import org.apache.hadoop.util.ToolRunner
 import org.apache.nutch.crawl.CrawlDatum;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
+import org.apache.nutch.util.TimingUtil;
 import org.apache.nutch.util.URLUtil;
 
 /**
@@ -70,7 +72,7 @@ implements Tool, Mapper<Text, CrawlDatum
   
   public int run(String[] args) throws IOException {
     if (args.length < 3) {
-      System.out.println("usage: inputDirs outDir host|domain|suffix [numOfReducer]");
+      System.out.println("usage: DomainStatistics inputDirs outDir host|domain|suffix [numOfReducer]");
       return 1;
     }
     String inputDir = args[0];
@@ -81,6 +83,10 @@ implements Tool, Mapper<Text, CrawlDatum
       numOfReducers = Integer.parseInt(args[3]);
     }
 
+    SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    long start = System.currentTimeMillis();
+    LOG.info("DomainStatistics: starting at " + sdf.format(start));
+
     JobConf job = new NutchJob(getConf());
     job.setJobName("Domain statistics");
 
@@ -112,6 +118,8 @@ implements Tool, Mapper<Text, CrawlDatum
     
     JobClient.runJob(job);
     
+    long end = System.currentTimeMillis();
+    LOG.info("DomainStatistics: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
     return 0;
   }
 
@@ -122,12 +130,10 @@ implements Tool, Mapper<Text, CrawlDatum
   }
   
 
-  @Override
   public Configuration getConf() {
     return conf;
   }
 
-  @Override
   public void setConf(Configuration conf) {
     this.conf = conf;
   }