You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ma...@apache.org on 2016/06/03 13:02:35 UTC
nutch git commit: NUTCH-2272 Index checker server to optionally keep
client connection open
Repository: nutch
Updated Branches:
refs/heads/master 7956daee8 -> beb48a84b
NUTCH-2272 Index checker server to optionally keep client connection open
Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/beb48a84
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/beb48a84
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/beb48a84
Branch: refs/heads/master
Commit: beb48a84b2be52f92af24956ae59286ad116913c
Parents: 7956dae
Author: Markus Jelsma <ma...@apache.org>
Authored: Fri Jun 3 15:02:12 2016 +0200
Committer: Markus Jelsma <ma...@apache.org>
Committed: Fri Jun 3 15:02:12 2016 +0200
----------------------------------------------------------------------
CHANGES.txt | 1 +
.../nutch/indexer/IndexingFiltersChecker.java | 35 ++++++++++++++------
2 files changed, 25 insertions(+), 11 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/nutch/blob/beb48a84/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index ffcf5ae..877f23b 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -37,6 +37,7 @@ Bug
Improvement
+ [NUTCH-2272] - Index checker server to optionally keep client connection open
[NUTCH-1233] - Rely on Tika for outlink extraction
[NUTCH-1712] - Use MultipleInputs in Injector to make it a single mapreduce job
[NUTCH-2172] - index-more: document format of contenttype-mapping.txt
http://git-wip-us.apache.org/repos/asf/nutch/blob/beb48a84/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java b/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
index da4123f..2e1b9c2 100644
--- a/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
+++ b/src/java/org/apache/nutch/indexer/IndexingFiltersChecker.java
@@ -69,6 +69,7 @@ public class IndexingFiltersChecker extends Configured implements Tool {
protected URLNormalizers normalizers = null;
protected boolean dumpText = false;
protected boolean followRedirects = false;
+ protected boolean keepClientCnxOpen = false;
// used to simulate the metadata propagated from injection
protected HashMap<String, String> metadata = new HashMap<String, String>();
protected int tcpPort = -1;
@@ -82,7 +83,7 @@ public class IndexingFiltersChecker extends Configured implements Tool {
public int run(String[] args) throws Exception {
String url = null;
- String usage = "Usage: IndexingFiltersChecker [-normalize] [-followRedirects] [-dumpText] [-md key=value] [-listen <port>] <url>";
+ String usage = "Usage: IndexingFiltersChecker [-normalize] [-followRedirects] [-dumpText] [-md key=value] [-listen <port>] [-keepClientCnxOpen]";
if (args.length == 0) {
System.err.println(usage);
@@ -96,6 +97,8 @@ public class IndexingFiltersChecker extends Configured implements Tool {
tcpPort = Integer.parseInt(args[++i]);
} else if (args[i].equals("-followRedirects")) {
followRedirects = true;
+ } else if (args[i].equals("-keepClientCnxOpen")) {
+ keepClientCnxOpen = true;
} else if (args[i].equals("-dumpText")) {
dumpText = true;
} else if (args[i].equals("-md")) {
@@ -164,7 +167,23 @@ public class IndexingFiltersChecker extends Configured implements Tool {
LOG.info(client.toString());
}
- public void run(){
+ public void run() {
+ if (keepClientCnxOpen) {
+ while (true) { // keep connection open until closes
+ readWrite();
+ }
+ } else {
+ readWrite();
+
+ try { // close ourselves
+ client.close();
+ } catch (Exception e){
+ LOG.error(e.toString());
+ }
+ }
+ }
+
+ protected void readWrite() {
String line;
BufferedReader in = null;
PrintWriter out = null;
@@ -185,14 +204,6 @@ public class IndexingFiltersChecker extends Configured implements Tool {
}catch (Exception e) {
LOG.error("Read/Write failed: " + e);
}
-
- try {
- client.close();
- } catch (Exception e){
- LOG.error(e.toString());
- }
-
- return;
}
}
@@ -331,6 +342,8 @@ public class IndexingFiltersChecker extends Configured implements Tool {
}
}
}
+
+ output.append("\n"); // For readability if keepClientCnxOpen
if (getConf().getBoolean("doIndex", false) && doc != null) {
IndexWriters writers = new IndexWriters(getConf());
@@ -355,4 +368,4 @@ public class IndexingFiltersChecker extends Configured implements Tool {
new IndexingFiltersChecker(), args);
System.exit(res);
}
-}
+}
\ No newline at end of file