You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2014/04/16 16:58:29 UTC

svn commit: r1587936 - in /nutch/branches/2.x: CHANGES.txt src/java/org/apache/nutch/parse/ParserChecker.java

Author: jnioche
Date: Wed Apr 16 14:58:29 2014
New Revision: 1587936

URL: http://svn.apache.org/r1587936
Log:
NUTCH-1743

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/java/org/apache/nutch/parse/ParserChecker.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1587936&r1=1587935&r2=1587936&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Wed Apr 16 14:58:29 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-1743 parsechecker to show outlinks (snagel)
+
 * NUTCH-1732 Better cmd line parsing for NutchServer (Fjodor Vershinin via lewismc)
 
 * NUTCH-1751 Empty anchors should not index (Sertac TURKEL via lewismc)

Modified: nutch/branches/2.x/src/java/org/apache/nutch/parse/ParserChecker.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/parse/ParserChecker.java?rev=1587936&r1=1587935&r2=1587936&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/parse/ParserChecker.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/parse/ParserChecker.java Wed Apr 16 14:58:29 2014
@@ -175,6 +175,12 @@ public class ParserChecker implements To
       }
       System.out.print(sb.toString());
     }
+    LOG.info("---------\nOutlinks\n---------\n");
+    sb = new StringBuffer();
+    for (Outlink l : parse.getOutlinks()) {
+      sb.append("  outlink: ").append(l).append('\n');
+    }
+    System.out.print(sb.toString());
     if (dumpText) {
       LOG.info("---------\nParseText\n---------\n");
       System.out.print(parse.getText());