You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2015/01/29 06:39:03 UTC
svn commit: r1655526 [9/26] - in /nutch/trunk: ./ src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/indexer/ src/java/org/apache/nutch/metadata/ src/java/org/apache/nutch/net/ src/java/org/apache/nutch/net/pr...

Modified: nutch/trunk/src/java/org/apache/nutch/scoring/ScoringFilterException.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/ScoringFilterException.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/ScoringFilterException.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/ScoringFilterException.java Thu Jan 29 05:38:59 2015
@@ -1,19 +1,19 @@
 /*
-* Licensed to the Apache Software Foundation (ASF) under one or more
-* contributor license agreements.  See the NOTICE file distributed with
-* this work for additional information regarding copyright ownership.
-* The ASF licenses this file to You under the Apache License, Version 2.0
-* (the "License"); you may not use this file except in compliance with
-* the License.  You may obtain a copy of the License at
-*
-*     http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
 package org.apache.nutch.scoring;
 
 /**

Modified: nutch/trunk/src/java/org/apache/nutch/scoring/ScoringFilters.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/ScoringFilters.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/ScoringFilters.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/ScoringFilters.java Thu Jan 29 05:38:59 2015
@@ -49,7 +49,8 @@ public class ScoringFilters extends Conf
   }
 
   /** Calculate a sort value for Generate. */
-  public float generatorSortValue(Text url, CrawlDatum datum, float initSort) throws ScoringFilterException {
+  public float generatorSortValue(Text url, CrawlDatum datum, float initSort)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       initSort = this.filters[i].generatorSortValue(url, datum, initSort);
     }
@@ -57,48 +58,59 @@ public class ScoringFilters extends Conf
   }
 
   /** Calculate a new initial score, used when adding newly discovered pages. */
-  public void initialScore(Text url, CrawlDatum datum) throws ScoringFilterException {
+  public void initialScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       this.filters[i].initialScore(url, datum);
     }
   }
 
   /** Calculate a new initial score, used when injecting new pages. */
-  public void injectedScore(Text url, CrawlDatum datum) throws ScoringFilterException {
+  public void injectedScore(Text url, CrawlDatum datum)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       this.filters[i].injectedScore(url, datum);
     }
   }
 
   /** Calculate updated page score during CrawlDb.update(). */
-  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum, List<CrawlDatum> inlinked) throws ScoringFilterException {
+  public void updateDbScore(Text url, CrawlDatum old, CrawlDatum datum,
+      List<CrawlDatum> inlinked) throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       this.filters[i].updateDbScore(url, old, datum, inlinked);
     }
   }
 
-  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content) throws ScoringFilterException {
+  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       this.filters[i].passScoreBeforeParsing(url, datum, content);
     }
   }
-  
-  public void passScoreAfterParsing(Text url, Content content, Parse parse) throws ScoringFilterException {
+
+  public void passScoreAfterParsing(Text url, Content content, Parse parse)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
       this.filters[i].passScoreAfterParsing(url, content, parse);
     }
   }
-  
-  public CrawlDatum distributeScoreToOutlinks(Text fromUrl, ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets, CrawlDatum adjust, int allCount) throws ScoringFilterException {
+
+  public CrawlDatum distributeScoreToOutlinks(Text fromUrl,
+      ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets,
+      CrawlDatum adjust, int allCount) throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
-      adjust = this.filters[i].distributeScoreToOutlinks(fromUrl, parseData, targets, adjust, allCount);
+      adjust = this.filters[i].distributeScoreToOutlinks(fromUrl, parseData,
+          targets, adjust, allCount);
     }
     return adjust;
   }
 
-  public float indexerScore(Text url, NutchDocument doc, CrawlDatum dbDatum, CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore) throws ScoringFilterException {
+  public float indexerScore(Text url, NutchDocument doc, CrawlDatum dbDatum,
+      CrawlDatum fetchDatum, Parse parse, Inlinks inlinks, float initScore)
+      throws ScoringFilterException {
     for (int i = 0; i < this.filters.length; i++) {
-      initScore = this.filters[i].indexerScore(url, doc, dbDatum, fetchDatum, parse, inlinks, initScore);
+      initScore = this.filters[i].indexerScore(url, doc, dbDatum, fetchDatum,
+          parse, inlinks, initScore);
     }
     return initScore;
   }

Modified: nutch/trunk/src/java/org/apache/nutch/scoring/package-info.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/package-info.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/package-info.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/package-info.java Thu Jan 29 05:38:59 2015
@@ -19,3 +19,4 @@
  * The {@link org.apache.nutch.scoring.ScoringFilter ScoringFilter} interface.
  */
 package org.apache.nutch.scoring;
+

Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkDatum.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkDatum.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkDatum.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkDatum.java Thu Jan 29 05:38:59 2015
@@ -27,8 +27,7 @@ import org.apache.hadoop.io.Writable;
  * A class for holding link information including the url, anchor text, a score,
  * the timestamp of the link and a link type.
  */
-public class LinkDatum
-  implements Writable {
+public class LinkDatum implements Writable {
 
   public final static byte INLINK = 1;
   public final static byte OUTLINK = 2;
@@ -49,7 +48,8 @@ public class LinkDatum
   /**
    * Creates a LinkDatum with a given url. Timestamp is set to current time.
    * 
-   * @param url The link url.
+   * @param url
+   *          The link url.
    */
   public LinkDatum(String url) {
     this(url, "", System.currentTimeMillis());
@@ -59,8 +59,10 @@ public class LinkDatum
    * Creates a LinkDatum with a url and an anchor text. Timestamp is set to
    * current time.
    * 
-   * @param url The link url.
-   * @param anchor The link anchor text.
+   * @param url
+   *          The link url.
+   * @param anchor
+   *          The link anchor text.
    */
   public LinkDatum(String url, String anchor) {
     this(url, anchor, System.currentTimeMillis());
@@ -112,8 +114,7 @@ public class LinkDatum
     this.linkType = linkType;
   }
 
-  public void readFields(DataInput in)
-    throws IOException {
+  public void readFields(DataInput in) throws IOException {
     url = Text.readString(in);
     anchor = Text.readString(in);
     score = in.readFloat();
@@ -121,8 +122,7 @@ public class LinkDatum
     linkType = in.readByte();
   }
 
-  public void write(DataOutput out)
-    throws IOException {
+  public void write(DataOutput out) throws IOException {
     Text.writeString(out, url);
     Text.writeString(out, anchor != null ? anchor : "");
     out.writeFloat(score);
@@ -132,9 +132,9 @@ public class LinkDatum
 
   public String toString() {
 
-    String type = (linkType == INLINK ? "inlink" : (linkType == OUTLINK)
-      ? "outlink" : "unknown");
+    String type = (linkType == INLINK ? "inlink"
+        : (linkType == OUTLINK) ? "outlink" : "unknown");
     return "url: " + url + ", anchor: " + anchor + ", score: " + score
-      + ", timestamp: " + timestamp + ", link type: " + type;
+        + ", timestamp: " + timestamp + ", link type: " + type;
   }
 }

Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java Thu Jan 29 05:38:59 2015
@@ -67,27 +67,24 @@ import org.apache.nutch.util.TimingUtil;
 
 /**
  * The LinkDumper tool creates a database of node to inlink information that can
- * be read using the nested Reader class.  This allows the inlink and scoring 
- * state of a single url to be reviewed quickly to determine why a given url is 
- * ranking a certain way.  This tool is to be used with the LinkRank analysis.
+ * be read using the nested Reader class. This allows the inlink and scoring
+ * state of a single url to be reviewed quickly to determine why a given url is
+ * ranking a certain way. This tool is to be used with the LinkRank analysis.
  */
-public class LinkDumper
-  extends Configured
-  implements Tool {
+public class LinkDumper extends Configured implements Tool {
 
   public static final Logger LOG = LoggerFactory.getLogger(LinkDumper.class);
   public static final String DUMP_DIR = "linkdump";
 
   /**
-   * Reader class which will print out the url and all of its inlinks to system 
-   * out.  Each inlinkwill be displayed with its node information including 
-   * score and number of in and outlinks.
+   * Reader class which will print out the url and all of its inlinks to system
+   * out. Each inlinkwill be displayed with its node information including score
+   * and number of in and outlinks.
    */
   public static class Reader {
 
-    public static void main(String[] args)
-      throws Exception {
-      
+    public static void main(String[] args) throws Exception {
+
       if (args == null || args.length < 2) {
         System.out.println("LinkDumper$Reader usage: <webgraphdb> <url>");
         return;
@@ -99,20 +96,20 @@ public class LinkDumper
       Path webGraphDb = new Path(args[0]);
       String url = args[1];
       MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new Path(
-        webGraphDb, DUMP_DIR), conf);
+          webGraphDb, DUMP_DIR), conf);
 
       // get the link nodes for the url
       Text key = new Text(url);
       LinkNodes nodes = new LinkNodes();
       MapFileOutputFormat.getEntry(readers,
-        new HashPartitioner<Text, LinkNodes>(), key, nodes);
+          new HashPartitioner<Text, LinkNodes>(), key, nodes);
 
       // print out the link nodes
       LinkNode[] linkNodesAr = nodes.getLinks();
       System.out.println(url + ":");
       for (LinkNode node : linkNodesAr) {
         System.out.println("  " + node.getUrl() + " - "
-          + node.getNode().toString());
+            + node.getNode().toString());
       }
 
       // close the readers
@@ -123,8 +120,7 @@ public class LinkDumper
   /**
    * Bean class which holds url to node information.
    */
-  public static class LinkNode
-    implements Writable {
+  public static class LinkNode implements Writable {
 
     private String url = null;
     private Node node = null;
@@ -154,15 +150,13 @@ public class LinkDumper
       this.node = node;
     }
 
-    public void readFields(DataInput in)
-      throws IOException {
+    public void readFields(DataInput in) throws IOException {
       url = in.readUTF();
       node = new Node();
       node.readFields(in);
     }
 
-    public void write(DataOutput out)
-      throws IOException {
+    public void write(DataOutput out) throws IOException {
       out.writeUTF(url);
       node.write(out);
     }
@@ -172,8 +166,7 @@ public class LinkDumper
   /**
    * Writable class which holds an array of LinkNode objects.
    */
-  public static class LinkNodes
-    implements Writable {
+  public static class LinkNodes implements Writable {
 
     private LinkNode[] links;
 
@@ -193,8 +186,7 @@ public class LinkDumper
       this.links = links;
     }
 
-    public void readFields(DataInput in)
-      throws IOException {
+    public void readFields(DataInput in) throws IOException {
       int numLinks = in.readInt();
       if (numLinks > 0) {
         links = new LinkNode[numLinks];
@@ -206,8 +198,7 @@ public class LinkDumper
       }
     }
 
-    public void write(DataOutput out)
-      throws IOException {
+    public void write(DataOutput out) throws IOException {
       if (links != null && links.length > 0) {
         int numLinks = links.length;
         out.writeInt(numLinks);
@@ -222,9 +213,9 @@ public class LinkDumper
    * Inverts outlinks from the WebGraph to inlinks and attaches node
    * information.
    */
-  public static class Inverter
-    implements Mapper<Text, Writable, Text, ObjectWritable>,
-    Reducer<Text, ObjectWritable, Text, LinkNode> {
+  public static class Inverter implements
+      Mapper<Text, Writable, Text, ObjectWritable>,
+      Reducer<Text, ObjectWritable, Text, LinkNode> {
 
     private JobConf conf;
 
@@ -236,8 +227,8 @@ public class LinkDumper
      * Wraps all values in ObjectWritables.
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+        throws IOException {
 
       ObjectWritable objWrite = new ObjectWritable();
       objWrite.set(value);
@@ -245,12 +236,12 @@ public class LinkDumper
     }
 
     /**
-     * Inverts outlinks to inlinks while attaching node information to the 
+     * Inverts outlinks to inlinks while attaching node information to the
      * outlink.
      */
     public void reduce(Text key, Iterator<ObjectWritable> values,
-      OutputCollector<Text, LinkNode> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LinkNode> output, Reporter reporter)
+        throws IOException {
 
       String fromUrl = key.toString();
       List<LinkDatum> outlinks = new ArrayList<LinkDatum>();
@@ -262,13 +253,11 @@ public class LinkDumper
         ObjectWritable write = values.next();
         Object obj = write.get();
         if (obj instanceof Node) {
-          node = (Node)obj;
-        }
-        else if (obj instanceof LinkDatum) {
-          outlinks.add(WritableUtils.clone((LinkDatum)obj, conf));
-        }
-        else if (obj instanceof LoopSet) {
-          loops = (LoopSet)obj;
+          node = (Node) obj;
+        } else if (obj instanceof LinkDatum) {
+          outlinks.add(WritableUtils.clone((LinkDatum) obj, conf));
+        } else if (obj instanceof LoopSet) {
+          loops = (LoopSet) obj;
         }
       }
 
@@ -280,13 +269,13 @@ public class LinkDumper
         for (int i = 0; i < outlinks.size(); i++) {
           LinkDatum outlink = outlinks.get(i);
           String toUrl = outlink.getUrl();
-          
+
           // remove any url that is in the loopset, same as LinkRank
           if (loopSet != null && loopSet.contains(toUrl)) {
             continue;
           }
-          
-          // collect the outlink as an inlink with the node 
+
+          // collect the outlink as an inlink with the node
           output.collect(new Text(toUrl), new LinkNode(fromUrl, node));
         }
       }
@@ -297,11 +286,11 @@ public class LinkDumper
   }
 
   /**
-   * Merges LinkNode objects into a single array value per url.  This allows 
-   * all values to be quickly retrieved and printed via the Reader tool.
+   * Merges LinkNode objects into a single array value per url. This allows all
+   * values to be quickly retrieved and printed via the Reader tool.
    */
-  public static class Merger
-    implements Reducer<Text, LinkNode, Text, LinkNodes> {
+  public static class Merger implements
+      Reducer<Text, LinkNode, Text, LinkNodes> {
 
     private JobConf conf;
     private int maxInlinks = 50000;
@@ -314,8 +303,8 @@ public class LinkDumper
      * Aggregate all LinkNode objects for a given url.
      */
     public void reduce(Text key, Iterator<LinkNode> values,
-      OutputCollector<Text, LinkNodes> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LinkNodes> output, Reporter reporter)
+        throws IOException {
 
       List<LinkNode> nodeList = new ArrayList<LinkNode>();
       int numNodes = 0;
@@ -325,8 +314,7 @@ public class LinkDumper
         if (numNodes < maxInlinks) {
           nodeList.add(WritableUtils.clone(cur, conf));
           numNodes++;
-        }
-        else {
+        } else {
           break;
         }
       }
@@ -342,11 +330,10 @@ public class LinkDumper
   }
 
   /**
-   * Runs the inverter and merger jobs of the LinkDumper tool to create the 
-   * url to inlink node database.
+   * Runs the inverter and merger jobs of the LinkDumper tool to create the url
+   * to inlink node database.
    */
-  public void dumpLinks(Path webGraphDb)
-    throws IOException {
+  public void dumpLinks(Path webGraphDb) throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -362,7 +349,7 @@ public class LinkDumper
 
     // run the inverter job
     Path tempInverted = new Path(webGraphDb, "inverted-"
-      + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
     JobConf inverter = new NutchJob(conf);
     inverter.setJobName("LinkDumper: inverter");
     FileInputFormat.addInputPath(inverter, nodeDb);
@@ -384,8 +371,7 @@ public class LinkDumper
       LOG.info("LinkDumper: running inverter");
       JobClient.runJob(inverter);
       LOG.info("LinkDumper: finished inverter");
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -407,43 +393,41 @@ public class LinkDumper
       LOG.info("LinkDumper: running merger");
       JobClient.runJob(merger);
       LOG.info("LinkDumper: finished merger");
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
 
     fs.delete(tempInverted, true);
     long end = System.currentTimeMillis();
-    LOG.info("LinkDumper: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("LinkDumper: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new LinkDumper(),
-      args);
+        args);
     System.exit(res);
   }
 
   /**
-   * Runs the LinkDumper tool.  This simply creates the database, to read the
+   * Runs the LinkDumper tool. This simply creates the database, to read the
    * values the nested Reader tool must be used.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
     OptionBuilder.withArgName("help");
     OptionBuilder.withDescription("show this help message");
     Option helpOpts = OptionBuilder.create("help");
     options.addOption(helpOpts);
-    
+
     OptionBuilder.withArgName("webgraphdb");
     OptionBuilder.hasArg();
     OptionBuilder.withDescription("the web graph database to use");
     Option webGraphDbOpts = OptionBuilder.create("webgraphdb");
     options.addOption(webGraphDbOpts);
-    
+
     CommandLineParser parser = new GnuParser();
     try {
 
@@ -457,8 +441,7 @@ public class LinkDumper
       String webGraphDb = line.getOptionValue("webgraphdb");
       dumpLinks(new Path(webGraphDb));
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("LinkDumper: " + StringUtils.stringifyException(e));
       return -2;
     }

Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java Thu Jan 29 05:38:59 2015
@@ -68,9 +68,7 @@ import org.apache.nutch.util.NutchJob;
 import org.apache.nutch.util.TimingUtil;
 import org.apache.nutch.util.URLUtil;
 
-public class LinkRank
-  extends Configured
-  implements Tool {
+public class LinkRank extends Configured implements Tool {
 
   public static final Logger LOG = LoggerFactory.getLogger(LinkRank.class);
   private static final String NUM_NODES = "_num_nodes_";
@@ -79,14 +77,16 @@ public class LinkRank
    * Runs the counter job. The counter job determines the number of links in the
    * webgraph. This is used during analysis.
    * 
-   * @param fs The job file system.
-   * @param webGraphDb The web graph database to use.
+   * @param fs
+   *          The job file system.
+   * @param webGraphDb
+   *          The web graph database to use.
    * 
    * @return The number of nodes in the web graph.
-   * @throws IOException If an error occurs while running the counter job.
+   * @throws IOException
+   *           If an error occurs while running the counter job.
    */
-  private int runCounter(FileSystem fs, Path webGraphDb)
-    throws IOException {
+  private int runCounter(FileSystem fs, Path webGraphDb) throws IOException {
 
     // configure the counter job
     Path numLinksPath = new Path(webGraphDb, NUM_NODES);
@@ -105,14 +105,14 @@ public class LinkRank
     counter.setOutputValueClass(LongWritable.class);
     counter.setNumReduceTasks(1);
     counter.setOutputFormat(TextOutputFormat.class);
-    counter.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
+    counter.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs",
+        false);
 
     // run the counter job, outputs to a single reduce task and file
     LOG.info("Starting link counter job");
     try {
       JobClient.runJob(counter);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -125,13 +125,13 @@ public class LinkRank
     BufferedReader buffer = new BufferedReader(new InputStreamReader(readLinks));
     String numLinksLine = buffer.readLine();
     readLinks.close();
-    
+
     // check if there are links to process, if none, webgraph might be empty
     if (numLinksLine == null || numLinksLine.length() == 0) {
       fs.delete(numLinksPath, true);
       throw new IOException("No links to process, is the webgraph empty?");
     }
-    
+
     // delete temp file and convert and return the number of links as an int
     LOG.info("Deleting numlinks temp file");
     fs.delete(numLinksPath, true);
@@ -143,13 +143,15 @@ public class LinkRank
    * Runs the initializer job. The initializer job sets up the nodes with a
    * default starting score for link analysis.
    * 
-   * @param nodeDb The node database to use.
-   * @param output The job output directory.
+   * @param nodeDb
+   *          The node database to use.
+   * @param output
+   *          The job output directory.
    * 
-   * @throws IOException If an error occurs while running the initializer job.
+   * @throws IOException
+   *           If an error occurs while running the initializer job.
    */
-  private void runInitializer(Path nodeDb, Path output)
-    throws IOException {
+  private void runInitializer(Path nodeDb, Path output) throws IOException {
 
     // configure the initializer
     JobConf initializer = new NutchJob(getConf());
@@ -163,14 +165,14 @@ public class LinkRank
     initializer.setOutputKeyClass(Text.class);
     initializer.setOutputValueClass(Node.class);
     initializer.setOutputFormat(MapFileOutputFormat.class);
-    initializer.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
+    initializer.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs",
+        false);
 
     // run the initializer
     LOG.info("Starting initialization job");
     try {
       JobClient.runJob(initializer);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -186,15 +188,20 @@ public class LinkRank
    * space requirements but it can be very useful is weeding out and eliminating
    * link farms and other spam pages.
    * 
-   * @param nodeDb The node database to use.
-   * @param outlinkDb The outlink database to use.
-   * @param loopDb The loop database to use if it exists.
-   * @param output The output directory.
+   * @param nodeDb
+   *          The node database to use.
+   * @param outlinkDb
+   *          The outlink database to use.
+   * @param loopDb
+   *          The loop database to use if it exists.
+   * @param output
+   *          The output directory.
    * 
-   * @throws IOException If an error occurs while running the inverter job.
+   * @throws IOException
+   *           If an error occurs while running the inverter job.
    */
   private void runInverter(Path nodeDb, Path outlinkDb, Path loopDb, Path output)
-    throws IOException {
+      throws IOException {
 
     // configure the inverter
     JobConf inverter = new NutchJob(getConf());
@@ -215,14 +222,14 @@ public class LinkRank
     inverter.setOutputKeyClass(Text.class);
     inverter.setOutputValueClass(LinkDatum.class);
     inverter.setOutputFormat(SequenceFileOutputFormat.class);
-    inverter.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
+    inverter.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs",
+        false);
 
     // run the inverter job
     LOG.info("Starting inverter job");
     try {
       JobClient.runJob(inverter);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -236,23 +243,28 @@ public class LinkRank
    * Typically the link analysis job is run a number of times to allow the link
    * rank scores to converge.
    * 
-   * @param nodeDb The node database from which we are getting previous link
-   * rank scores.
-   * @param inverted The inverted inlinks
-   * @param output The link analysis output.
-   * @param iteration The current iteration number.
-   * @param numIterations The total number of link analysis iterations
+   * @param nodeDb
+   *          The node database from which we are getting previous link rank
+   *          scores.
+   * @param inverted
+   *          The inverted inlinks
+   * @param output
+   *          The link analysis output.
+   * @param iteration
+   *          The current iteration number.
+   * @param numIterations
+   *          The total number of link analysis iterations
    * 
-   * @throws IOException If an error occurs during link analysis.
+   * @throws IOException
+   *           If an error occurs during link analysis.
    */
   private void runAnalysis(Path nodeDb, Path inverted, Path output,
-    int iteration, int numIterations, float rankOne)
-    throws IOException {
+      int iteration, int numIterations, float rankOne) throws IOException {
 
     JobConf analyzer = new NutchJob(getConf());
     analyzer.set("link.analyze.iteration", String.valueOf(iteration + 1));
     analyzer.setJobName("LinkAnalysis Analyzer, iteration " + (iteration + 1)
-      + " of " + numIterations);
+        + " of " + numIterations);
     FileInputFormat.addInputPath(analyzer, nodeDb);
     FileInputFormat.addInputPath(analyzer, inverted);
     FileOutputFormat.setOutputPath(analyzer, output);
@@ -265,13 +277,13 @@ public class LinkRank
     analyzer.setOutputKeyClass(Text.class);
     analyzer.setOutputValueClass(Node.class);
     analyzer.setOutputFormat(MapFileOutputFormat.class);
-    analyzer.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
+    analyzer.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs",
+        false);
 
     LOG.info("Starting analysis job");
     try {
       JobClient.runJob(analyzer);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -283,9 +295,9 @@ public class LinkRank
    * This is used to determine a rank one score for pages with zero inlinks but
    * that contain outlinks.
    */
-  private static class Counter
-    implements Mapper<Text, Node, Text, LongWritable>,
-    Reducer<Text, LongWritable, Text, LongWritable> {
+  private static class Counter implements
+      Mapper<Text, Node, Text, LongWritable>,
+      Reducer<Text, LongWritable, Text, LongWritable> {
 
     private static Text numNodes = new Text(NUM_NODES);
     private static LongWritable one = new LongWritable(1L);
@@ -297,8 +309,8 @@ public class LinkRank
      * Outputs one for every node.
      */
     public void map(Text key, Node value,
-      OutputCollector<Text, LongWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
+        throws IOException {
       output.collect(numNodes, one);
     }
 
@@ -306,8 +318,8 @@ public class LinkRank
      * Totals the node number and outputs a single total value.
      */
     public void reduce(Text key, Iterator<LongWritable> values,
-      OutputCollector<Text, LongWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LongWritable> output, Reporter reporter)
+        throws IOException {
 
       long total = 0;
       while (values.hasNext()) {
@@ -320,8 +332,7 @@ public class LinkRank
     }
   }
 
-  private static class Initializer
-    implements Mapper<Text, Node, Text, Node> {
+  private static class Initializer implements Mapper<Text, Node, Text, Node> {
 
     private JobConf conf;
     private float initialScore = 1.0f;
@@ -332,8 +343,7 @@ public class LinkRank
     }
 
     public void map(Text key, Node node, OutputCollector<Text, Node> output,
-      Reporter reporter)
-      throws IOException {
+        Reporter reporter) throws IOException {
 
       String url = key.toString();
       Node outNode = WritableUtils.clone(node, conf);
@@ -351,9 +361,9 @@ public class LinkRank
    * WebGraph. The link analysis process consists of inverting, analyzing and
    * scoring, in a loop for a given number of iterations.
    */
-  private static class Inverter
-    implements Mapper<Text, Writable, Text, ObjectWritable>,
-    Reducer<Text, ObjectWritable, Text, LinkDatum> {
+  private static class Inverter implements
+      Mapper<Text, Writable, Text, ObjectWritable>,
+      Reducer<Text, ObjectWritable, Text, LinkDatum> {
 
     private JobConf conf;
 
@@ -365,8 +375,8 @@ public class LinkRank
      * Convert values to ObjectWritable
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+        throws IOException {
 
       ObjectWritable objWrite = new ObjectWritable();
       objWrite.set(value);
@@ -379,8 +389,8 @@ public class LinkRank
      * within the loopset.
      */
     public void reduce(Text key, Iterator<ObjectWritable> values,
-      OutputCollector<Text, LinkDatum> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LinkDatum> output, Reporter reporter)
+        throws IOException {
 
       String fromUrl = key.toString();
       List<LinkDatum> outlinks = new ArrayList<LinkDatum>();
@@ -392,23 +402,25 @@ public class LinkRank
         ObjectWritable write = values.next();
         Object obj = write.get();
         if (obj instanceof Node) {
-          node = (Node)obj;
-        }
-        else if (obj instanceof LinkDatum) {
-          outlinks.add(WritableUtils.clone((LinkDatum)obj, conf));
-        }
-        else if (obj instanceof LoopSet) {
-          loops = (LoopSet)obj;
+          node = (Node) obj;
+        } else if (obj instanceof LinkDatum) {
+          outlinks.add(WritableUtils.clone((LinkDatum) obj, conf));
+        } else if (obj instanceof LoopSet) {
+          loops = (LoopSet) obj;
         }
       }
 
-      // Check for the possibility of a LoopSet object without Node and LinkDatum objects. This can happen
-      // with webgraphs that receive deletes (e.g. link.delete.gone and/or URL filters or normalizers) but
+      // Check for the possibility of a LoopSet object without Node and
+      // LinkDatum objects. This can happen
+      // with webgraphs that receive deletes (e.g. link.delete.gone and/or URL
+      // filters or normalizers) but
       // without an updated Loops database.
       // See: https://issues.apache.org/jira/browse/NUTCH-1299
       if (node == null && loops != null) {
         // Nothing to do
-        LOG.warn("LoopSet without Node object received for " + key.toString() + " . You should either not use Loops as input of the LinkRank program or rerun the Loops program over the WebGraph.");
+        LOG.warn("LoopSet without Node object received for "
+            + key.toString()
+            + " . You should either not use Loops as input of the LinkRank program or rerun the Loops program over the WebGraph.");
         return;
       }
 
@@ -430,7 +442,7 @@ public class LinkRank
           // remove any url that is contained in the loopset
           if (loopSet != null && loopSet.contains(toUrl)) {
             LOG.debug(fromUrl + ": Skipping inverting inlink from loop "
-              + toUrl);
+                + toUrl);
             continue;
           }
           outlink.setUrl(fromUrl);
@@ -439,8 +451,8 @@ public class LinkRank
           // collect the inverted outlink
           output.collect(new Text(toUrl), outlink);
           LOG.debug(toUrl + ": inverting inlink from " + fromUrl
-            + " origscore: " + inlinkScore + " numOutlinks: " + numOutlinks
-            + " inlinkscore: " + outlinkScore);
+              + " origscore: " + inlinkScore + " numOutlinks: " + numOutlinks
+              + " inlinkscore: " + outlinkScore);
         }
       }
     }
@@ -452,9 +464,9 @@ public class LinkRank
   /**
    * Runs a single link analysis iteration.
    */
-  private static class Analyzer
-    implements Mapper<Text, Writable, Text, ObjectWritable>,
-    Reducer<Text, ObjectWritable, Text, Node> {
+  private static class Analyzer implements
+      Mapper<Text, Writable, Text, ObjectWritable>,
+      Reducer<Text, ObjectWritable, Text, Node> {
 
     private JobConf conf;
     private float dampingFactor = 0.85f;
@@ -471,13 +483,13 @@ public class LinkRank
 
       try {
         this.conf = conf;
-        this.dampingFactor = conf.getFloat("link.analyze.damping.factor", 0.85f);
+        this.dampingFactor = conf
+            .getFloat("link.analyze.damping.factor", 0.85f);
         this.rankOne = conf.getFloat("link.analyze.rank.one", 0.0f);
         this.itNum = conf.getInt("link.analyze.iteration", 0);
         limitPages = conf.getBoolean("link.ignore.limit.page", true);
         limitDomains = conf.getBoolean("link.ignore.limit.domain", true);
-      }
-      catch (Exception e) {
+      } catch (Exception e) {
         LOG.error(StringUtils.stringifyException(e));
         throw new IllegalArgumentException(e);
       }
@@ -487,8 +499,8 @@ public class LinkRank
      * Convert values to ObjectWritable
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+        throws IOException {
 
       ObjectWritable objWrite = new ObjectWritable();
       objWrite.set(WritableUtils.clone(value, conf));
@@ -500,8 +512,8 @@ public class LinkRank
      * stored in a temporary NodeDb which replaces the NodeDb of the WebGraph.
      */
     public void reduce(Text key, Iterator<ObjectWritable> values,
-      OutputCollector<Text, Node> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, Node> output, Reporter reporter)
+        throws IOException {
 
       String url = key.toString();
       Set<String> domains = new HashSet<String>();
@@ -517,11 +529,10 @@ public class LinkRank
         ObjectWritable next = values.next();
         Object value = next.get();
         if (value instanceof Node) {
-          node = (Node)value;
-        }
-        else if (value instanceof LinkDatum) {
+          node = (Node) value;
+        } else if (value instanceof LinkDatum) {
 
-          LinkDatum linkDatum = (LinkDatum)value;
+          LinkDatum linkDatum = (LinkDatum) value;
           float scoreFromInlink = linkDatum.getScore();
           String inlinkUrl = linkDatum.getUrl();
           String inLinkDomain = URLUtil.getDomainName(inlinkUrl);
@@ -529,9 +540,9 @@ public class LinkRank
 
           // limit counting duplicate inlinks by pages or domains
           if ((limitPages && pages.contains(inLinkPage))
-            || (limitDomains && domains.contains(inLinkDomain))) {
+              || (limitDomains && domains.contains(inLinkDomain))) {
             LOG.debug(url + ": ignoring " + scoreFromInlink + " from "
-              + inlinkUrl + ", duplicate page or domain");
+                + inlinkUrl + ", duplicate page or domain");
             continue;
           }
 
@@ -541,16 +552,16 @@ public class LinkRank
           domains.add(inLinkDomain);
           pages.add(inLinkPage);
           LOG.debug(url + ": adding " + scoreFromInlink + " from " + inlinkUrl
-            + ", total: " + totalInlinkScore);
+              + ", total: " + totalInlinkScore);
         }
       }
 
       // calculate linkRank score formula
       float linkRankScore = (1 - this.dampingFactor)
-        + (this.dampingFactor * totalInlinkScore);
+          + (this.dampingFactor * totalInlinkScore);
 
       LOG.debug(url + ": score: " + linkRankScore + " num inlinks: "
-        + numInlinks + " iteration: " + itNum);
+          + numInlinks + " iteration: " + itNum);
 
       // store the score in a temporary NodeDb
       Node outNode = WritableUtils.clone(node, conf);
@@ -558,8 +569,7 @@ public class LinkRank
       output.collect(key, outNode);
     }
 
-    public void close()
-      throws IOException {
+    public void close() throws IOException {
     }
   }
 
@@ -586,12 +596,13 @@ public class LinkRank
    * by default 10. And finally replaces the NodeDb in the WebGraph with the
    * link rank output.
    * 
-   * @param webGraphDb The WebGraph to run link analysis on.
+   * @param webGraphDb
+   *          The WebGraph to run link analysis on.
    * 
-   * @throws IOException If an error occurs during link analysis.
+   * @throws IOException
+   *           If an error occurs during link analysis.
    */
-  public void analyze(Path webGraphDb)
-    throws IOException {
+  public void analyze(Path webGraphDb) throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -621,7 +632,7 @@ public class LinkRank
     // initialze all urls with a default score
     int numLinks = runCounter(fs, webGraphDb);
     runInitializer(wgNodeDb, nodeDb);
-    float rankOneScore = (1f / (float)numLinks);
+    float rankOneScore = (1f / (float) numLinks);
 
     if (LOG.isInfoEnabled()) {
       LOG.info("Analysis: Number of links: " + numLinks);
@@ -634,9 +645,10 @@ public class LinkRank
     for (int i = 0; i < numIterations; i++) {
 
       // the input to inverting is always the previous output from analysis
-      LOG.info("Analysis: Starting iteration " + (i + 1) + " of " + numIterations);
+      LOG.info("Analysis: Starting iteration " + (i + 1) + " of "
+          + numIterations);
       Path tempRank = new Path(linkRank + "-"
-        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+          + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
       fs.mkdirs(tempRank);
       Path tempInverted = new Path(tempRank, "inverted");
       Path tempNodeDb = new Path(tempRank, WebGraph.NODE_DIR);
@@ -644,13 +656,13 @@ public class LinkRank
       // run invert and analysis
       runInverter(nodeDb, wgOutlinkDb, loopDb, tempInverted);
       runAnalysis(nodeDb, tempInverted, tempNodeDb, i, numIterations,
-        rankOneScore);
+          rankOneScore);
 
       // replace the temporary NodeDb with the output from analysis
       LOG.info("Analysis: Installing new link scores");
       FSUtils.replace(fs, linkRank, tempRank, true);
       LOG.info("Analysis: finished iteration " + (i + 1) + " of "
-        + numIterations);
+          + numIterations);
     }
 
     // replace the NodeDb in the WebGraph with the final output of analysis
@@ -660,11 +672,11 @@ public class LinkRank
     // remove the temporary link rank folder
     fs.delete(linkRank, true);
     long end = System.currentTimeMillis();
-    LOG.info("Analysis: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("Analysis: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new LinkRank(), args);
     System.exit(res);
   }
@@ -672,15 +684,14 @@ public class LinkRank
   /**
    * Runs the LinkRank tool.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
     OptionBuilder.withArgName("help");
     OptionBuilder.withDescription("show this help message");
     Option helpOpts = OptionBuilder.create("help");
     options.addOption(helpOpts);
-    
+
     OptionBuilder.withArgName("webgraphdb");
     OptionBuilder.hasArg();
     OptionBuilder.withDescription("the web graph db to use");
@@ -701,8 +712,7 @@ public class LinkRank
 
       analyze(new Path(webGraphDb));
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("LinkAnalysis: " + StringUtils.stringifyException(e));
       return -2;
     }

Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LoopReader.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LoopReader.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LoopReader.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LoopReader.java Thu Jan 29 05:38:59 2015
@@ -44,35 +44,38 @@ public class LoopReader extends Configur
 
   private FileSystem fs;
   private MapFile.Reader[] loopReaders;
-  
-  public LoopReader() { }
-  
+
+  public LoopReader() {
+  }
+
   public LoopReader(Configuration conf) {
     super(conf);
   }
 
   /**
-   * Prints loopset for a single url.  The loopset information will show any
+   * Prints loopset for a single url. The loopset information will show any
    * outlink url the eventually forms a link cycle.
    * 
-   * @param webGraphDb The WebGraph to check for loops
-   * @param url The url to check.
+   * @param webGraphDb
+   *          The WebGraph to check for loops
+   * @param url
+   *          The url to check.
    * 
-   * @throws IOException If an error occurs while printing loopset information.
+   * @throws IOException
+   *           If an error occurs while printing loopset information.
    */
-  public void dumpUrl(Path webGraphDb, String url)
-    throws IOException {
+  public void dumpUrl(Path webGraphDb, String url) throws IOException {
 
     // open the readers
     fs = FileSystem.get(getConf());
     loopReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
-      Loops.LOOPS_DIR), getConf());
+        Loops.LOOPS_DIR), getConf());
 
     // get the loopset for a given url, if any
     Text key = new Text(url);
     LoopSet loop = new LoopSet();
     MapFileOutputFormat.getEntry(loopReaders,
-      new HashPartitioner<Text, LoopSet>(), key, loop);
+        new HashPartitioner<Text, LoopSet>(), key, loop);
 
     // print out each loop url in the set
     System.out.println(url + ":");
@@ -85,24 +88,23 @@ public class LoopReader extends Configur
   }
 
   /**
-   * Runs the LoopReader tool.  For this tool to work the loops job must have
+   * Runs the LoopReader tool. For this tool to work the loops job must have
    * already been run on the corresponding WebGraph.
    */
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
 
     Options options = new Options();
     OptionBuilder.withArgName("help");
     OptionBuilder.withDescription("show this help message");
     Option helpOpts = OptionBuilder.create("help");
     options.addOption(helpOpts);
-    
+
     OptionBuilder.withArgName("webgraphdb");
     OptionBuilder.hasArg();
     OptionBuilder.withDescription("the webgraphdb to use");
     Option webGraphOpts = OptionBuilder.create("webgraphdb");
     options.addOption(webGraphOpts);
-    
+
     OptionBuilder.withArgName("url");
     OptionBuilder.hasOptionalArg();
     OptionBuilder.withDescription("the url to dump");
@@ -114,7 +116,7 @@ public class LoopReader extends Configur
 
       CommandLine line = parser.parse(options, args);
       if (line.hasOption("help") || !line.hasOption("webgraphdb")
-        || !line.hasOption("url")) {
+          || !line.hasOption("url")) {
         HelpFormatter formatter = new HelpFormatter();
         formatter.printHelp("WebGraphReader", options);
         return;
@@ -125,8 +127,7 @@ public class LoopReader extends Configur
       LoopReader reader = new LoopReader(NutchConfiguration.create());
       reader.dumpUrl(new Path(webGraphDb), url);
       return;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       e.printStackTrace();
       return;
     }

Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/Loops.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/Loops.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/Loops.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/Loops.java Thu Jan 29 05:38:59 2015
@@ -76,9 +76,7 @@ import org.apache.nutch.util.TimingUtil;
  * rather small. Because of this the Loops job is optional and if it doesn't
  * exist then it won't be factored into the LinkRank program.
  */
-public class Loops
-  extends Configured
-  implements Tool {
+public class Loops extends Configured implements Tool {
 
   public static final Logger LOG = LoggerFactory.getLogger(Loops.class);
   public static final String LOOPS_DIR = "loops";
@@ -87,8 +85,7 @@ public class Loops
   /**
    * A link path or route looking to identify a link cycle.
    */
-  public static class Route
-    implements Writable {
+  public static class Route implements Writable {
 
     private String outlinkUrl = null;
     private String lookingFor = null;
@@ -122,16 +119,14 @@ public class Loops
       this.found = found;
     }
 
-    public void readFields(DataInput in)
-      throws IOException {
+    public void readFields(DataInput in) throws IOException {
 
       outlinkUrl = Text.readString(in);
       lookingFor = Text.readString(in);
       found = in.readBoolean();
     }
 
-    public void write(DataOutput out)
-      throws IOException {
+    public void write(DataOutput out) throws IOException {
       Text.writeString(out, outlinkUrl);
       Text.writeString(out, lookingFor);
       out.writeBoolean(found);
@@ -141,8 +136,7 @@ public class Loops
   /**
    * A set of loops.
    */
-  public static class LoopSet
-    implements Writable {
+  public static class LoopSet implements Writable {
 
     private Set<String> loopSet = new HashSet<String>();
 
@@ -158,8 +152,7 @@ public class Loops
       this.loopSet = loopSet;
     }
 
-    public void readFields(DataInput in)
-      throws IOException {
+    public void readFields(DataInput in) throws IOException {
 
       int numNodes = in.readInt();
       loopSet = new HashSet<String>();
@@ -169,8 +162,7 @@ public class Loops
       }
     }
 
-    public void write(DataOutput out)
-      throws IOException {
+    public void write(DataOutput out) throws IOException {
 
       int numNodes = (loopSet != null ? loopSet.size() : 0);
       out.writeInt(numNodes);
@@ -191,10 +183,9 @@ public class Loops
   /**
    * Initializes the Loop routes.
    */
-  public static class Initializer
-    extends Configured
-    implements Mapper<Text, Writable, Text, ObjectWritable>,
-    Reducer<Text, ObjectWritable, Text, Route> {
+  public static class Initializer extends Configured implements
+      Mapper<Text, Writable, Text, ObjectWritable>,
+      Reducer<Text, ObjectWritable, Text, Route> {
 
     private JobConf conf;
 
@@ -222,8 +213,8 @@ public class Loops
      * Wraps values in ObjectWritable.
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+        throws IOException {
 
       ObjectWritable objWrite = new ObjectWritable();
       objWrite.set(value);
@@ -236,8 +227,8 @@ public class Loops
      * the Looper job.
      */
     public void reduce(Text key, Iterator<ObjectWritable> values,
-      OutputCollector<Text, Route> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, Route> output, Reporter reporter)
+        throws IOException {
 
       String url = key.toString();
       Node node = null;
@@ -248,10 +239,9 @@ public class Loops
         ObjectWritable objWrite = values.next();
         Object obj = objWrite.get();
         if (obj instanceof LinkDatum) {
-          outlinkList.add((LinkDatum)obj);
-        }
-        else if (obj instanceof Node) {
-          node = (Node)obj;
+          outlinkList.add((LinkDatum) obj);
+        } else if (obj instanceof Node) {
+          node = (Node) obj;
         }
       }
 
@@ -282,10 +272,9 @@ public class Loops
    * Follows a route path looking for the start url of the route. If the start
    * url is found then the route is a cyclical path.
    */
-  public static class Looper
-    extends Configured
-    implements Mapper<Text, Writable, Text, ObjectWritable>,
-    Reducer<Text, ObjectWritable, Text, Route> {
+  public static class Looper extends Configured implements
+      Mapper<Text, Writable, Text, ObjectWritable>,
+      Reducer<Text, ObjectWritable, Text, Route> {
 
     private JobConf conf;
     private boolean last = false;
@@ -315,15 +304,14 @@ public class Loops
      * Wrap values in ObjectWritable.
      */
     public void map(Text key, Writable value,
-      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+        throws IOException {
 
       ObjectWritable objWrite = new ObjectWritable();
       Writable cloned = null;
       if (value instanceof LinkDatum) {
-        cloned = new Text(((LinkDatum)value).getUrl());
-      }
-      else {
+        cloned = new Text(((LinkDatum) value).getUrl());
+      } else {
         cloned = WritableUtils.clone(value, conf);
       }
       objWrite.set(cloned);
@@ -336,8 +324,8 @@ public class Loops
      * passes.
      */
     public void reduce(Text key, Iterator<ObjectWritable> values,
-      OutputCollector<Text, Route> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, Route> output, Reporter reporter)
+        throws IOException {
 
       List<Route> routeList = new ArrayList<Route>();
       Set<String> outlinkUrls = new LinkedHashSet<String>();
@@ -348,10 +336,9 @@ public class Loops
         ObjectWritable next = values.next();
         Object value = next.get();
         if (value instanceof Route) {
-          routeList.add(WritableUtils.clone((Route)value, conf));
-        }
-        else if (value instanceof Text) {
-          String outlinkUrl = ((Text)value).toString();
+          routeList.add(WritableUtils.clone((Route) value, conf));
+        } else if (value instanceof Text) {
+          String outlinkUrl = ((Text) value).toString();
           if (!outlinkUrls.contains(outlinkUrl)) {
             outlinkUrls.add(outlinkUrl);
           }
@@ -375,16 +362,14 @@ public class Loops
         routeIt.remove();
         if (route.isFound()) {
           output.collect(key, route);
-        }
-        else {
+        } else {
 
           // if the route start url is found, set route to found and collect
           String lookingFor = route.getLookingFor();
           if (outlinkUrls.contains(lookingFor)) {
             route.setFound(true);
             output.collect(key, route);
-          }
-          else if (!last) {
+          } else if (!last) {
 
             // setup for next pass through the loop
             for (String outlink : outlinkUrls) {
@@ -402,10 +387,8 @@ public class Loops
   /**
    * Finishes the Loops job by aggregating and collecting and found routes.
    */
-  public static class Finalizer
-    extends Configured
-    implements Mapper<Text, Route, Text, Route>,
-    Reducer<Text, Route, Text, LoopSet> {
+  public static class Finalizer extends Configured implements
+      Mapper<Text, Route, Text, Route>, Reducer<Text, Route, Text, LoopSet> {
 
     private JobConf conf;
 
@@ -433,8 +416,7 @@ public class Loops
      * Maps out and found routes, those will be the link cycles.
      */
     public void map(Text key, Route value, OutputCollector<Text, Route> output,
-      Reporter reporter)
-      throws IOException {
+        Reporter reporter) throws IOException {
 
       if (value.isFound()) {
         String lookingFor = value.getLookingFor();
@@ -443,12 +425,12 @@ public class Loops
     }
 
     /**
-     * Aggregates all found routes for a given start url into a loopset and 
+     * Aggregates all found routes for a given start url into a loopset and
      * collects the loopset.
      */
     public void reduce(Text key, Iterator<Route> values,
-      OutputCollector<Text, LoopSet> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, LoopSet> output, Reporter reporter)
+        throws IOException {
 
       LoopSet loops = new LoopSet();
       while (values.hasNext()) {
@@ -465,8 +447,7 @@ public class Loops
   /**
    * Runs the various loop jobs.
    */
-  public void findLoops(Path webGraphDb)
-    throws IOException {
+  public void findLoops(Path webGraphDb) throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -481,7 +462,7 @@ public class Loops
     Path nodeDb = new Path(webGraphDb, WebGraph.NODE_DIR);
     Path routes = new Path(webGraphDb, ROUTES_DIR);
     Path tempRoute = new Path(webGraphDb, ROUTES_DIR + "-"
-      + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+        + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
 
     // run the initializer
     JobConf init = new NutchJob(conf);
@@ -504,8 +485,7 @@ public class Loops
       LOG.info("Loops: installing initializer " + routes);
       FSUtils.replace(fs, routes, tempRoute, true);
       LOG.info("Loops: finished initializer");
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
@@ -536,8 +516,7 @@ public class Loops
         LOG.info("Loops: installing looper " + routes);
         FSUtils.replace(fs, routes, tempRoute, true);
         LOG.info("Loops: finished looper");
-      }
-      catch (IOException e) {
+      } catch (IOException e) {
         LOG.error(StringUtils.stringifyException(e));
         throw e;
       }
@@ -561,17 +540,16 @@ public class Loops
       LOG.info("Loops: starting finalizer");
       JobClient.runJob(finalizer);
       LOG.info("Loops: finished finalizer");
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
     long end = System.currentTimeMillis();
-    LOG.info("Loops: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("Loops: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new Loops(), args);
     System.exit(res);
   }
@@ -579,15 +557,14 @@ public class Loops
   /**
    * Runs the Loops tool.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
     OptionBuilder.withArgName("help");
     OptionBuilder.withDescription("show this help message");
     Option helpOpts = OptionBuilder.create("help");
     options.addOption(helpOpts);
-    
+
     OptionBuilder.withArgName("webgraphdb");
     OptionBuilder.hasArg();
     OptionBuilder.withDescription("the web graph database to use");
@@ -607,8 +584,7 @@ public class Loops
       String webGraphDb = line.getOptionValue("webgraphdb");
       findLoops(new Path(webGraphDb));
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("Loops: " + StringUtils.stringifyException(e));
       return -2;
     }

Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/Node.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/Node.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/Node.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/Node.java Thu Jan 29 05:38:59 2015
@@ -25,12 +25,11 @@ import org.apache.nutch.metadata.Metadat
 
 /**
  * A class which holds the number of inlinks and outlinks for a given url along
- * with an inlink score from a link analysis program and any metadata.  
+ * with an inlink score from a link analysis program and any metadata.
  * 
  * The Node is the core unit of the NodeDb in the WebGraph.
  */
-public class Node
-  implements Writable {
+public class Node implements Writable {
 
   private int numInlinks = 0;
   private int numOutlinks = 0;
@@ -77,8 +76,7 @@ public class Node
     this.metadata = metadata;
   }
 
-  public void readFields(DataInput in)
-    throws IOException {
+  public void readFields(DataInput in) throws IOException {
 
     numInlinks = in.readInt();
     numOutlinks = in.readInt();
@@ -87,8 +85,7 @@ public class Node
     metadata.readFields(in);
   }
 
-  public void write(DataOutput out)
-    throws IOException {
+  public void write(DataOutput out) throws IOException {
 
     out.writeInt(numInlinks);
     out.writeInt(numOutlinks);
@@ -98,8 +95,8 @@ public class Node
 
   public String toString() {
     return "num inlinks: " + numInlinks + ", num outlinks: " + numOutlinks
-      + ", inlink score: " + inlinkScore + ", outlink score: "
-      + getOutlinkScore() + ", metadata: " + metadata.toString();
+        + ", inlink score: " + inlinkScore + ", outlink score: "
+        + getOutlinkScore() + ", metadata: " + metadata.toString();
   }
 
 }

Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java Thu Jan 29 05:38:59 2015
@@ -63,26 +63,20 @@ import org.apache.nutch.util.URLUtil;
  * have been run. For link analysis score a program such as LinkRank will need
  * to have been run which updates the NodeDb of the WebGraph.
  */
-public class NodeDumper
-  extends Configured
-  implements Tool {
+public class NodeDumper extends Configured implements Tool {
 
   public static final Logger LOG = LoggerFactory.getLogger(NodeDumper.class);
 
   private static enum DumpType {
-    INLINKS,
-    OUTLINKS,
-    SCORES
+    INLINKS, OUTLINKS, SCORES
   }
 
   private static enum AggrType {
-    SUM,
-    MAX
+    SUM, MAX
   }
 
   private static enum NameType {
-    HOST,
-    DOMAIN
+    HOST, DOMAIN
   }
 
   /**
@@ -90,10 +84,9 @@ public class NodeDumper
    * on the command line, the top urls could be for number of inlinks, for
    * number of outlinks, or for link analysis score.
    */
-  public static class Sorter
-    extends Configured
-    implements Mapper<Text, Node, FloatWritable, Text>,
-    Reducer<FloatWritable, Text, Text, FloatWritable> {
+  public static class Sorter extends Configured implements
+      Mapper<Text, Node, FloatWritable, Text>,
+      Reducer<FloatWritable, Text, Text, FloatWritable> {
 
     private JobConf conf;
     private boolean inlinks = false;
@@ -121,17 +114,15 @@ public class NodeDumper
      * score.
      */
     public void map(Text key, Node node,
-      OutputCollector<FloatWritable, Text> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<FloatWritable, Text> output, Reporter reporter)
+        throws IOException {
 
       float number = 0;
       if (inlinks) {
         number = node.getNumInlinks();
-      }
-      else if (outlinks) {
+      } else if (outlinks) {
         number = node.getNumOutlinks();
-      }
-      else {
+      } else {
         number = node.getInlinkScore();
       }
 
@@ -143,8 +134,8 @@ public class NodeDumper
      * Flips and collects the url and numeric sort value.
      */
     public void reduce(FloatWritable key, Iterator<Text> values,
-      OutputCollector<Text, FloatWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, FloatWritable> output, Reporter reporter)
+        throws IOException {
 
       // take the negative of the negative to get original value, sometimes 0
       // value are a little weird
@@ -162,14 +153,13 @@ public class NodeDumper
   }
 
   /**
-   * Outputs the hosts or domains with an associated value. This value consists of either
-   * the number of inlinks, the number of outlinks or the score. The computed value is then
-   * either the sum of all parts or the top value.
+   * Outputs the hosts or domains with an associated value. This value consists
+   * of either the number of inlinks, the number of outlinks or the score. The
+   * computed value is then either the sum of all parts or the top value.
    */
-  public static class Dumper
-    extends Configured
-    implements Mapper<Text, Node, Text, FloatWritable>,
-    Reducer<Text, FloatWritable, Text, FloatWritable> {
+  public static class Dumper extends Configured implements
+      Mapper<Text, Node, Text, FloatWritable>,
+      Reducer<Text, FloatWritable, Text, FloatWritable> {
 
     private JobConf conf;
     private boolean inlinks = false;
@@ -197,21 +187,19 @@ public class NodeDumper
     }
 
     /**
-     * Outputs the host or domain as key for this record and numInlinks, numOutlinks
-     * or score as the value.
+     * Outputs the host or domain as key for this record and numInlinks,
+     * numOutlinks or score as the value.
      */
     public void map(Text key, Node node,
-      OutputCollector<Text, FloatWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, FloatWritable> output, Reporter reporter)
+        throws IOException {
 
       float number = 0;
       if (inlinks) {
         number = node.getNumInlinks();
-      }
-      else if (outlinks) {
+      } else if (outlinks) {
         number = node.getNumOutlinks();
-      }
-      else {
+      } else {
         number = node.getInlinkScore();
       }
 
@@ -228,8 +216,8 @@ public class NodeDumper
      * Outputs either the sum or the top value for this record.
      */
     public void reduce(Text key, Iterator<FloatWritable> values,
-      OutputCollector<Text, FloatWritable> output, Reporter reporter)
-      throws IOException {
+        OutputCollector<Text, FloatWritable> output, Reporter reporter)
+        throws IOException {
 
       long numCollected = 0;
       float sumOrMax = 0;
@@ -256,16 +244,19 @@ public class NodeDumper
 
   /**
    * Runs the process to dump the top urls out to a text file.
-   *
-   * @param webGraphDb The WebGraph from which to pull values.
-   *
+   * 
+   * @param webGraphDb
+   *          The WebGraph from which to pull values.
+   * 
    * @param topN
    * @param output
-   *
-   * @throws IOException If an error occurs while dumping the top values.
+   * 
+   * @throws IOException
+   *           If an error occurs while dumping the top values.
    */
-  public void dumpNodes(Path webGraphDb, DumpType type, long topN, Path output, boolean asEff, NameType nameType, AggrType aggrType, boolean asSequenceFile)
-    throws Exception {
+  public void dumpNodes(Path webGraphDb, DumpType type, long topN, Path output,
+      boolean asEff, NameType nameType, AggrType aggrType,
+      boolean asSequenceFile) throws Exception {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -320,77 +311,76 @@ public class NodeDumper
     try {
       LOG.info("NodeDumper: running");
       JobClient.runJob(dumper);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
       throw e;
     }
     long end = System.currentTimeMillis();
-    LOG.info("NodeDumper: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("NodeDumper: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new NodeDumper(),
-      args);
+        args);
     System.exit(res);
   }
 
   /**
    * Runs the node dumper tool.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
     OptionBuilder.withArgName("help");
     OptionBuilder.withDescription("show this help message");
     Option helpOpts = OptionBuilder.create("help");
     options.addOption(helpOpts);
-    
+
     OptionBuilder.withArgName("webgraphdb");
     OptionBuilder.hasArg();
     OptionBuilder.withDescription("the web graph database to use");
     Option webGraphDbOpts = OptionBuilder.create("webgraphdb");
     options.addOption(webGraphDbOpts);
-    
+
     OptionBuilder.withArgName("inlinks");
     OptionBuilder.withDescription("show highest inlinks");
     Option inlinkOpts = OptionBuilder.create("inlinks");
     options.addOption(inlinkOpts);
-    
+
     OptionBuilder.withArgName("outlinks");
     OptionBuilder.withDescription("show highest outlinks");
     Option outlinkOpts = OptionBuilder.create("outlinks");
     options.addOption(outlinkOpts);
-    
+
     OptionBuilder.withArgName("scores");
     OptionBuilder.withDescription("show highest scores");
     Option scoreOpts = OptionBuilder.create("scores");
     options.addOption(scoreOpts);
-    
+
     OptionBuilder.withArgName("topn");
     OptionBuilder.hasOptionalArg();
     OptionBuilder.withDescription("show topN scores");
     Option topNOpts = OptionBuilder.create("topn");
     options.addOption(topNOpts);
-    
+
     OptionBuilder.withArgName("output");
     OptionBuilder.hasArg();
     OptionBuilder.withDescription("the output directory to use");
     Option outputOpts = OptionBuilder.create("output");
     options.addOption(outputOpts);
-    
+
     OptionBuilder.withArgName("asEff");
-    OptionBuilder.withDescription("Solr ExternalFileField compatible output format");
+    OptionBuilder
+        .withDescription("Solr ExternalFileField compatible output format");
     Option effOpts = OptionBuilder.create("asEff");
     options.addOption(effOpts);
-    
+
     OptionBuilder.hasArgs(2);
     OptionBuilder.withDescription("group <host|domain> <sum|max>");
     Option groupOpts = OptionBuilder.create("group");
     options.addOption(groupOpts);
-    
+
     OptionBuilder.withArgName("asSequenceFile");
     OptionBuilder.withDescription("whether to output as a sequencefile");
     Option sequenceFileOpts = OptionBuilder.create("asSequenceFile");
@@ -410,32 +400,32 @@ public class NodeDumper
       boolean inlinks = line.hasOption("inlinks");
       boolean outlinks = line.hasOption("outlinks");
 
-      long topN = (line.hasOption("topn")
-        ? Long.parseLong(line.getOptionValue("topn")) : Long.MAX_VALUE);
+      long topN = (line.hasOption("topn") ? Long.parseLong(line
+          .getOptionValue("topn")) : Long.MAX_VALUE);
 
       // get the correct dump type
       String output = line.getOptionValue("output");
-      DumpType type = (inlinks ? DumpType.INLINKS : outlinks
-        ? DumpType.OUTLINKS : DumpType.SCORES);
+      DumpType type = (inlinks ? DumpType.INLINKS
+          : outlinks ? DumpType.OUTLINKS : DumpType.SCORES);
 
       NameType nameType = null;
       AggrType aggrType = null;
       String[] group = line.getOptionValues("group");
       if (group != null && group.length == 2) {
-        nameType = (group[0].equals("host") ? NameType.HOST : group[0].equals("domain")
-          ? NameType.DOMAIN : null);
-        aggrType = (group[1].equals("sum") ? AggrType.SUM : group[1].equals("sum")
-          ? AggrType.MAX : null);
+        nameType = (group[0].equals("host") ? NameType.HOST : group[0]
+            .equals("domain") ? NameType.DOMAIN : null);
+        aggrType = (group[1].equals("sum") ? AggrType.SUM : group[1]
+            .equals("sum") ? AggrType.MAX : null);
       }
 
       // Use ExternalFileField?
       boolean asEff = line.hasOption("asEff");
       boolean asSequenceFile = line.hasOption("asSequenceFile");
 
-      dumpNodes(new Path(webGraphDb), type, topN, new Path(output), asEff, nameType, aggrType, asSequenceFile);
+      dumpNodes(new Path(webGraphDb), type, topN, new Path(output), asEff,
+          nameType, aggrType, asSequenceFile);
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("NodeDumper: " + StringUtils.stringifyException(e));
       return -2;
     }

Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeReader.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeReader.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeReader.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeReader.java Thu Jan 29 05:38:59 2015
@@ -37,7 +37,7 @@ import org.apache.nutch.util.FSUtils;
 import org.apache.nutch.util.NutchConfiguration;
 
 /**
- * Reads and prints to system out information for a single node from the NodeDb 
+ * Reads and prints to system out information for a single node from the NodeDb
  * in the WebGraph.
  */
 public class NodeReader extends Configured {
@@ -46,33 +46,35 @@ public class NodeReader extends Configur
   private MapFile.Reader[] nodeReaders;
 
   public NodeReader() {
-    
+
   }
-  
+
   public NodeReader(Configuration conf) {
     super(conf);
   }
-  
+
   /**
    * Prints the content of the Node represented by the url to system out.
    * 
-   * @param webGraphDb The webgraph from which to get the node.
-   * @param url The url of the node.
+   * @param webGraphDb
+   *          The webgraph from which to get the node.
+   * @param url
+   *          The url of the node.
    * 
-   * @throws IOException If an error occurs while getting the node.
+   * @throws IOException
+   *           If an error occurs while getting the node.
    */
-  public void dumpUrl(Path webGraphDb, String url)
-    throws IOException {
+  public void dumpUrl(Path webGraphDb, String url) throws IOException {
 
     fs = FileSystem.get(getConf());
     nodeReaders = MapFileOutputFormat.getReaders(fs, new Path(webGraphDb,
-      WebGraph.NODE_DIR), getConf());
+        WebGraph.NODE_DIR), getConf());
 
     // open the readers, get the node, print out the info, and close the readers
     Text key = new Text(url);
     Node node = new Node();
     MapFileOutputFormat.getEntry(nodeReaders,
-      new HashPartitioner<Text, Node>(), key, node);
+        new HashPartitioner<Text, Node>(), key, node);
     System.out.println(url + ":");
     System.out.println("  inlink score: " + node.getInlinkScore());
     System.out.println("  outlink score: " + node.getOutlinkScore());
@@ -82,25 +84,24 @@ public class NodeReader extends Configur
   }
 
   /**
-   * Runs the NodeReader tool.  The command line arguments must contain a 
-   * webgraphdb path and a url.  The url must match the normalized url that is
+   * Runs the NodeReader tool. The command line arguments must contain a
+   * webgraphdb path and a url. The url must match the normalized url that is
    * contained in the NodeDb of the WebGraph.
    */
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
 
     Options options = new Options();
     OptionBuilder.withArgName("help");
     OptionBuilder.withDescription("show this help message");
     Option helpOpts = OptionBuilder.create("help");
     options.addOption(helpOpts);
-    
+
     OptionBuilder.withArgName("webgraphdb");
     OptionBuilder.hasArg();
     OptionBuilder.withDescription("the webgraphdb to use");
     Option webGraphOpts = OptionBuilder.create("webgraphdb");
     options.addOption(webGraphOpts);
-    
+
     OptionBuilder.withArgName("url");
     OptionBuilder.hasOptionalArg();
     OptionBuilder.withDescription("the url to dump");
@@ -113,7 +114,7 @@ public class NodeReader extends Configur
       // command line must take a webgraphdb and a url
       CommandLine line = parser.parse(options, args);
       if (line.hasOption("help") || !line.hasOption("webgraphdb")
-        || !line.hasOption("url")) {
+          || !line.hasOption("url")) {
         HelpFormatter formatter = new HelpFormatter();
         formatter.printHelp("WebGraphReader", options);
         return;
@@ -124,10 +125,9 @@ public class NodeReader extends Configur
       String url = line.getOptionValue("url");
       NodeReader reader = new NodeReader(NutchConfiguration.create());
       reader.dumpUrl(new Path(webGraphDb), url);
-      
+
       return;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       e.printStackTrace();
       return;
     }

Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java?rev=1655526&r1=1655525&r2=1655526&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java Thu Jan 29 05:38:59 2015
@@ -58,13 +58,12 @@ import org.apache.nutch.util.TimingUtil;
 
 /**
  * Updates the score from the WebGraph node database into the crawl database.
- * Any score that is not in the node database is set to the clear score in the 
+ * Any score that is not in the node database is set to the clear score in the
  * crawl database.
  */
-public class ScoreUpdater
-  extends Configured
-  implements Tool, Mapper<Text, Writable, Text, ObjectWritable>,
-  Reducer<Text, ObjectWritable, Text, CrawlDatum> {
+public class ScoreUpdater extends Configured implements Tool,
+    Mapper<Text, Writable, Text, ObjectWritable>,
+    Reducer<Text, ObjectWritable, Text, CrawlDatum> {
 
   public static final Logger LOG = LoggerFactory.getLogger(ScoreUpdater.class);
 
@@ -80,8 +79,8 @@ public class ScoreUpdater
    * Changes input into ObjectWritables.
    */
   public void map(Text key, Writable value,
-    OutputCollector<Text, ObjectWritable> output, Reporter reporter)
-    throws IOException {
+      OutputCollector<Text, ObjectWritable> output, Reporter reporter)
+      throws IOException {
 
     ObjectWritable objWrite = new ObjectWritable();
     objWrite.set(value);
@@ -93,8 +92,8 @@ public class ScoreUpdater
    * with a cleared score.
    */
   public void reduce(Text key, Iterator<ObjectWritable> values,
-    OutputCollector<Text, CrawlDatum> output, Reporter reporter)
-    throws IOException {
+      OutputCollector<Text, CrawlDatum> output, Reporter reporter)
+      throws IOException {
 
     String url = key.toString();
     Node node = null;
@@ -106,34 +105,31 @@ public class ScoreUpdater
       ObjectWritable next = values.next();
       Object value = next.get();
       if (value instanceof Node) {
-        node = (Node)value;
-      }
-      else if (value instanceof CrawlDatum) {
-        datum = (CrawlDatum)value;
+        node = (Node) value;
+      } else if (value instanceof CrawlDatum) {
+        datum = (CrawlDatum) value;
       }
     }
 
-    // datum should never be null, could happen if somehow the url was 
+    // datum should never be null, could happen if somehow the url was
     // normalized or changed after being pulled from the crawldb
     if (datum != null) {
 
       if (node != null) {
-        
+
         // set the inlink score in the nodedb
         float inlinkScore = node.getInlinkScore();
         datum.setScore(inlinkScore);
         LOG.debug(url + ": setting to score " + inlinkScore);
-      }
-      else {
-        
+      } else {
+
         // clear out the score in the crawldb
         datum.setScore(clearScore);
         LOG.debug(url + ": setting to clear score of " + clearScore);
       }
 
       output.collect(key, datum);
-    }
-    else {
+    } else {
       LOG.debug(url + ": no datum");
     }
   }
@@ -142,16 +138,18 @@ public class ScoreUpdater
   }
 
   /**
-   * Updates the inlink score in the web graph node databsae into the crawl 
+   * Updates the inlink score in the web graph node databsae into the crawl
    * database.
    * 
-   * @param crawlDb The crawl database to update
-   * @param webGraphDb The webgraph database to use.
+   * @param crawlDb
+   *          The crawl database to update
+   * @param webGraphDb
+   *          The webgraph database to use.
    * 
-   * @throws IOException If an error occurs while updating the scores.
+   * @throws IOException
+   *           If an error occurs while updating the scores.
    */
-  public void update(Path crawlDb, Path webGraphDb)
-    throws IOException {
+  public void update(Path crawlDb, Path webGraphDb) throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
     long start = System.currentTimeMillis();
@@ -164,8 +162,8 @@ public class ScoreUpdater
     LOG.info("Running crawldb update " + crawlDb);
     Path nodeDb = new Path(webGraphDb, WebGraph.NODE_DIR);
     Path crawlDbCurrent = new Path(crawlDb, CrawlDb.CURRENT_NAME);
-    Path newCrawlDb = new Path(crawlDb,
-      Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
+    Path newCrawlDb = new Path(crawlDb, Integer.toString(new Random()
+        .nextInt(Integer.MAX_VALUE)));
 
     // run the updater job outputting to the temp crawl database
     JobConf updater = new NutchJob(conf);
@@ -184,10 +182,9 @@ public class ScoreUpdater
 
     try {
       JobClient.runJob(updater);
-    }
-    catch (IOException e) {
+    } catch (IOException e) {
       LOG.error(StringUtils.stringifyException(e));
-      
+
       // remove the temp crawldb on error
       if (fs.exists(newCrawlDb)) {
         fs.delete(newCrawlDb, true);
@@ -200,34 +197,33 @@ public class ScoreUpdater
     CrawlDb.install(updater, crawlDb);
 
     long end = System.currentTimeMillis();
-    LOG.info("ScoreUpdater: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end));
+    LOG.info("ScoreUpdater: finished at " + sdf.format(end) + ", elapsed: "
+        + TimingUtil.elapsedTime(start, end));
   }
 
-  public static void main(String[] args)
-    throws Exception {
+  public static void main(String[] args) throws Exception {
     int res = ToolRunner.run(NutchConfiguration.create(), new ScoreUpdater(),
-      args);
+        args);
     System.exit(res);
   }
 
   /**
    * Runs the ScoreUpdater tool.
    */
-  public int run(String[] args)
-    throws Exception {
+  public int run(String[] args) throws Exception {
 
     Options options = new Options();
     OptionBuilder.withArgName("help");
     OptionBuilder.withDescription("show this help message");
     Option helpOpts = OptionBuilder.create("help");
     options.addOption(helpOpts);
-    
+
     OptionBuilder.withArgName("crawldb");
     OptionBuilder.hasArg();
     OptionBuilder.withDescription("the crawldb to use");
     Option crawlDbOpts = OptionBuilder.create("crawldb");
     options.addOption(crawlDbOpts);
-    
+
     OptionBuilder.withArgName("webgraphdb");
     OptionBuilder.hasArg();
     OptionBuilder.withDescription("the webgraphdb to use");
@@ -239,7 +235,7 @@ public class ScoreUpdater
 
       CommandLine line = parser.parse(options, args);
       if (line.hasOption("help") || !line.hasOption("webgraphdb")
-        || !line.hasOption("crawldb")) {
+          || !line.hasOption("crawldb")) {
         HelpFormatter formatter = new HelpFormatter();
         formatter.printHelp("ScoreUpdater", options);
         return -1;
@@ -249,8 +245,7 @@ public class ScoreUpdater
       String webGraphDb = line.getOptionValue("webgraphdb");
       update(new Path(crawlDb), new Path(webGraphDb));
       return 0;
-    }
-    catch (Exception e) {
+    } catch (Exception e) {
       LOG.error("ScoreUpdater: " + StringUtils.stringifyException(e));
       return -1;
     }