You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2014/06/17 16:23:49 UTC

svn commit: r1603185 - in /nutch/trunk: CHANGES.txt src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java

Author: markus
Date: Tue Jun 17 14:23:49 2014
New Revision: 1603185

URL: http://svn.apache.org/r1603185
Log:
NUTCH-1794 IndexingFilterChecker to optionally dumpText

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1603185&r1=1603184&r2=1603185&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Jun 17 14:23:49 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Current Development
 
+* NUTCH-1794 IndexingFilterChecker to optionally dumpText (markus)
+
 * NUTCH-1590 [SECURITY] Frame injection vulnerability in published Javadoc (jnioche)
 
 * NUTCH-1793 HttpRobotRulesParser not configured properly (jnioche)

Modified: nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java?rev=1603185&r1=1603184&r2=1603185&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java Tue Jun 17 14:23:49 2014
@@ -63,15 +63,25 @@ public class IndexingFiltersChecker exte
   public int run(String[] args) throws Exception {
     String contentType = null;
     String url = null;
+    boolean dumpText = false;
 
-    String usage = "Usage: IndexingFiltersChecker <url>";
+    String usage = "Usage: IndexingFiltersChecker [-dumpText] <url>";
 
-    if (args.length != 1) {
+    if (args.length == 0) {
       System.err.println(usage);
       return -1;
     }
 
-    url = URLUtil.toASCII(args[0]);
+    for (int i = 0; i < args.length; i++) {
+      if (args[i].equals("-dumpText")) {
+        dumpText = true;
+      } else if (i != args.length - 1) {
+        System.err.println(usage);
+        System.exit(-1);
+      } else {
+        url = URLUtil.toASCII(args[i]);
+      }
+    }
 
     if (LOG.isInfoEnabled()) {
       LOG.info("fetching: " + url);
@@ -148,7 +158,7 @@ public class IndexingFiltersChecker exte
       if (values != null) {
         for (Object value : values) {
           String str = value.toString();
-          int minText = Math.min(100, str.length());
+          int minText = dumpText ? str.length() : Math.min(100, str.length());
           System.out.println(fname + " :\t" + str.substring(0, minText));
         }
       }