You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2018/03/27 14:40:46 UTC

[nutch] branch master updated (7cb7abd -> bcf64ce)

This is an automated email from the ASF dual-hosted git repository.

snagel pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git.


    from 7cb7abd  Merge pull request #295 from lewismc/NUTCH-2516
     add 133c8dd  fix for NUTCH-2543 contributed by Jurian Broertjes
     new bcf64ce  Merge pull request #303 from sju/NUTCH-2543

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 src/java/org/apache/nutch/crawl/CrawlDbReader.java | 43 +++++++++++---
 src/java/org/apache/nutch/crawl/LinkDbReader.java  | 65 ++++++++++++++--------
 .../org/apache/nutch/util/AbstractChecker.java     | 48 +++++++++-------
 3 files changed, 107 insertions(+), 49 deletions(-)

-- 
To stop receiving notification emails like this one, please contact
snagel@apache.org.

[nutch] 01/01: Merge pull request #303 from sju/NUTCH-2543

Posted by sn...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git

commit bcf64cec92340d7a74cfb02f7ce36de5228034ce
Merge: 7cb7abd 133c8dd
Author: Sebastian Nagel <sn...@apache.org>
AuthorDate: Tue Mar 27 16:40:44 2018 +0200

    Merge pull request #303 from sju/NUTCH-2543
    
    fix for NUTCH-2543 contributed by Jurian Broertjes

 src/java/org/apache/nutch/crawl/CrawlDbReader.java | 43 +++++++++++---
 src/java/org/apache/nutch/crawl/LinkDbReader.java  | 65 ++++++++++++++--------
 .../org/apache/nutch/util/AbstractChecker.java     | 48 +++++++++-------
 3 files changed, 107 insertions(+), 49 deletions(-)

diff --cc src/java/org/apache/nutch/crawl/LinkDbReader.java
index bf537b7,f5daf4d..8efaf0a
--- a/src/java/org/apache/nutch/crawl/LinkDbReader.java
+++ b/src/java/org/apache/nutch/crawl/LinkDbReader.java
@@@ -43,11 -40,10 +43,12 @@@ import org.apache.hadoop.mapreduce.lib.
  import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
  import org.apache.hadoop.mapreduce.Partitioner;
  import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
 -import org.apache.hadoop.util.*;
 +import org.apache.hadoop.util.StringUtils;
 +import org.apache.hadoop.util.Tool;
 +import org.apache.hadoop.util.ToolRunner;
  import org.apache.hadoop.conf.Configuration;
  
+ import org.apache.nutch.util.AbstractChecker;
  import org.apache.nutch.util.NutchConfiguration;
  import org.apache.nutch.util.NutchJob;
  import org.apache.nutch.util.TimingUtil;
@@@ -172,10 -168,25 +173,25 @@@ public class LinkDbReader extends Abstr
      }
  
      long end = System.currentTimeMillis();
 -    LOG.info("LinkDb dump: finished at " + sdf.format(end) + ", elapsed: "
 -        + TimingUtil.elapsedTime(start, end));
 +    LOG.info("LinkDb dump: finished at {}, elapsed: {}",
 +            sdf.format(end), TimingUtil.elapsedTime(start, end));
    }
  
+   protected int process(String line, StringBuilder output) throws Exception {
+ 
+     Inlinks links = getInlinks(new Text(line));
+     if (links == null) {
+       output.append(" - no link information.");
+     } else {
+       Iterator<Inlink> it = links.iterator();
+       while (it.hasNext()) {
+         output.append(it.next().toString());
+       }
+     }
+     output.append("\n");
+     return 0;
+   }
+ 
    public static void main(String[] args) throws Exception {
      int res = ToolRunner.run(NutchConfiguration.create(), new LinkDbReader(),
          args);

-- 
To stop receiving notification emails like this one, please contact
snagel@apache.org.