You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2016/06/03 13:02:35 UTC

nutch git commit: NUTCH-2272 Index checker server to optionally keep client connection open

Repository: nutch
Updated Branches:
  refs/heads/master 7956daee8 -> beb48a84b


NUTCH-2272 Index checker server to optionally keep client connection open


Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/beb48a84
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/beb48a84
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/beb48a84

Branch: refs/heads/master
Commit: beb48a84b2be52f92af24956ae59286ad116913c
Parents: 7956dae
Author: Markus Jelsma <ma...@apache.org>
Authored: Fri Jun 3 15:02:12 2016 +0200
Committer: Markus Jelsma <ma...@apache.org>
Committed: Fri Jun 3 15:02:12 2016 +0200

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../nutch/indexer/IndexingFiltersChecker.java   | 35 ++++++++++++++------
 2 files changed, 25 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/nutch/blob/beb48a84/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index ffcf5ae..877f23b 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -37,6 +37,7 @@ Bug
 
 Improvement
 
+    [NUTCH-2272] - Index checker server to optionally keep client connection open
     [NUTCH-1233] - Rely on Tika for outlink extraction
     [NUTCH-1712] - Use MultipleInputs in Injector to make it a single mapreduce job
     [NUTCH-2172] - index-more: document format of contenttype-mapping.txt

http://git-wip-us.apache.org/repos/asf/nutch/blob/beb48a84/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java b/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
index da4123f..2e1b9c2 100644
--- a/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
+++ b/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
@@ -69,6 +69,7 @@ public class IndexingFiltersChecker extends Configured implements Tool {
   protected URLNormalizers normalizers = null;
   protected boolean dumpText = false;
   protected boolean followRedirects = false;
+  protected boolean keepClientCnxOpen = false;
   // used to simulate the metadata propagated from injection
   protected HashMap<String, String> metadata = new HashMap<String, String>();
   protected int tcpPort = -1;
@@ -82,7 +83,7 @@ public class IndexingFiltersChecker extends Configured implements Tool {
 
   public int run(String[] args) throws Exception {
     String url = null;
-    String usage = "Usage: IndexingFiltersChecker [-normalize] [-followRedirects] [-dumpText] [-md key=value] [-listen <port>] <url>";
+    String usage = "Usage: IndexingFiltersChecker [-normalize] [-followRedirects] [-dumpText] [-md key=value] [-listen <port>] [-keepClientCnxOpen]";
 
     if (args.length == 0) {
       System.err.println(usage);
@@ -96,6 +97,8 @@ public class IndexingFiltersChecker extends Configured implements Tool {
         tcpPort = Integer.parseInt(args[++i]);
       } else if (args[i].equals("-followRedirects")) {
         followRedirects = true;
+      } else if (args[i].equals("-keepClientCnxOpen")) {
+        keepClientCnxOpen = true;
       } else if (args[i].equals("-dumpText")) {
         dumpText = true;
       } else if (args[i].equals("-md")) {
@@ -164,7 +167,23 @@ public class IndexingFiltersChecker extends Configured implements Tool {
       LOG.info(client.toString());
     }
 
-    public void run(){
+    public void run() {
+      if (keepClientCnxOpen) {
+        while (true) { // keep connection open until closes
+          readWrite();
+        }
+      } else {
+        readWrite();
+        
+        try { // close ourselves
+          client.close();
+        } catch (Exception e){
+          LOG.error(e.toString());
+        }
+      }
+    }
+    
+    protected void readWrite() {
       String line;
       BufferedReader in = null;
       PrintWriter out = null;
@@ -185,14 +204,6 @@ public class IndexingFiltersChecker extends Configured implements Tool {
       }catch (Exception e) {
         LOG.error("Read/Write failed: " + e);
       }
-      
-      try {
-        client.close();
-      } catch (Exception e){
-        LOG.error(e.toString());
-      }
-      
-      return;
     }
   }
     
@@ -331,6 +342,8 @@ public class IndexingFiltersChecker extends Configured implements Tool {
         }
       }
     }
+    
+    output.append("\n"); // For readability if keepClientCnxOpen
 
     if (getConf().getBoolean("doIndex", false) && doc != null) {
       IndexWriters writers = new IndexWriters(getConf());
@@ -355,4 +368,4 @@ public class IndexingFiltersChecker extends Configured implements Tool {
         new IndexingFiltersChecker(), args);
     System.exit(res);
   }
-}
+}
\ No newline at end of file