You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2014/04/16 16:56:09 UTC

svn commit: r1587935 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/parse/ParserChecker.java

Author: jnioche
Date: Wed Apr 16 14:56:09 2014
New Revision: 1587935

URL: http://svn.apache.org/r1587935
Log:
NUTCH-1743

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1587935&r1=1587934&r2=1587935&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Wed Apr 16 14:56:09 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Current Development
 
+* NUTCH-1743 parsechecker to show outlinks (jnioche, snagel)
+
 * NUTCH-1603 ZIP parser complains about truncated PDF file (snagel)
 
 * NUTCH-1720 Duplicate lines in HttpBase.java (Walter Tietze via jnioche)

Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java?rev=1587935&r1=1587934&r2=1587935&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/parse/ParserChecker.java Wed Apr 16 14:56:09 2014
@@ -151,6 +151,12 @@ public class ParserChecker implements To
       System.out.print(entry.getKey());
       LOG.info("\n---------\nParseData\n---------\n");
       System.out.print(parse.getData().toString());
+      LOG.info("---------\nOutlinks\n---------\n");
+      StringBuffer sb = new StringBuffer();
+      for (Outlink l : parse.getData().getOutlinks()) {
+        sb.append("  outlink: ").append(l).append('\n');
+      }
+      System.out.print(sb.toString());
       if (dumpText) {
         LOG.info("---------\nParseText\n---------\n");
         System.out.print(parse.getText());