You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2006/06/26 21:38:40 UTC
svn commit: r417285 - in /lucene/nutch/trunk: conf/nutch-default.xml
src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java
Author: ab
Date: Mon Jun 26 12:38:39 2006
New Revision: 417285
URL: http://svn.apache.org/viewvc?rev=417285&view=rev
Log:
Add an optional mechanism to time limit long-running queries. This helps to
protect search servers from adverse effects of certain resource-intensive
queries.
Development of this functionality was supported by Krugle.net. Thank you!
Modified:
lucene/nutch/trunk/conf/nutch-default.xml
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java
Modified: lucene/nutch/trunk/conf/nutch-default.xml
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/conf/nutch-default.xml?rev=417285&r1=417284&r2=417285&view=diff
==============================================================================
--- lucene/nutch/trunk/conf/nutch-default.xml (original)
+++ lucene/nutch/trunk/conf/nutch-default.xml Mon Jun 26 12:38:39 2006
@@ -546,6 +546,27 @@
suffers little.</description>
</property>
+<property>
+ <name>searcher.max.time.tick_count</name>
+ <value>-1</value>
+ <description>If positive value is defined here, limit search time for
+ every request to this number of elapsed ticks (see the tick_length
+ property below). The total maximum time for any search request will be
+ then limited to tick_count * tick_length milliseconds. When search time
+ is exceeded, partial results will be returned, and the total number of
+ hits will be estimated.
+ </description>
+</property>
+
+<property>
+ <name>searcher.max.time.tick_length</name>
+ <value>200</value>
+ <description>The number of milliseconds between ticks. Larger values
+ reduce the timer granularity (precision). Smaller values bring more
+ overhead.
+ </description>
+</property>
+
<!-- URL normalizer properties -->
<property>
Modified: lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java?rev=417285&r1=417284&r2=417285&view=diff
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/searcher/LuceneQueryOptimizer.java Mon Jun 26 12:38:39 2006
@@ -37,32 +37,106 @@
* which do not affect ranking but might otherwise slow search considerably. */
class LuceneQueryOptimizer {
- private static class LimitExceeded extends RuntimeException {
- private int maxDoc;
- public LimitExceeded(int maxDoc) { this.maxDoc = maxDoc; }
+ // This thread provides a pseudo-clock service to all searching
+ // threads, so that they can count elapsed time with less overhead than
+ // repeatedly calling System.currentTimeMillis.
+ private TimerThread timerThread = null;
+
+ private static class TimerThread extends Thread {
+ private int tick;
+ // NOTE: we can avoid explicit synchronization here for several reasons:
+ // * updates to 32-bit-sized variables are atomic
+ // * only single thread modifies this value
+ // * use of volatile keyword ensures that it does not reside in
+ // a register, but in main memory (so that changes are visible to
+ // other threads).
+ // * visibility of changes does not need to be instantanous, we can
+ // afford losing a tick or two.
+ //
+ // See section 17 of the Java Language Specification for details.
+ public volatile int timeCounter = 0;
+
+ boolean running = true;
+
+ public TimerThread(int tick) {
+ super("LQO timer thread");
+ this.tick = tick;
+ this.setDaemon(true);
+ }
+
+ public void run() {
+ while(running) {
+ timeCounter++;
+ try {
+ Thread.sleep(tick);
+ } catch (InterruptedException ie) {};
+ }
+ }
+ }
+
+ private void initTimerThread(int p) {
+ if (timerThread == null || !timerThread.isAlive()) {
+ timerThread = new TimerThread(p);
+ timerThread.start();
+ }
}
+
+ private static class TimeExceeded extends RuntimeException {
+ public long maxTime;
+ private int maxDoc;
+ public TimeExceeded(long maxTime, int maxDoc) {
+ super("Exceeded search time: " + maxTime + " ms.");
+ this.maxTime = maxTime;
+ this.maxDoc = maxDoc;
+ }
+ }
+
private static class LimitedCollector extends TopDocCollector {
private int maxHits;
-
- public LimitedCollector(int numHits, int maxHits) {
+ private int maxTicks;
+ private int startTicks;
+ private TimerThread timer;
+ private int curTicks;
+
+ public LimitedCollector(int numHits, int maxHits, int maxTicks,
+ TimerThread timer) {
super(numHits);
this.maxHits = maxHits;
+ this.maxTicks = maxTicks;
+ this.timer = timer;
+ this.startTicks = timer.timeCounter;
}
public void collect(int doc, float score) {
- if (getTotalHits() >= maxHits)
+ if (maxHits > 0 && getTotalHits() >= maxHits) {
throw new LimitExceeded(doc);
+ }
+ if (timer != null) {
+ curTicks = timer.timeCounter;
+ // overflow check
+ if (curTicks < startTicks) curTicks += Integer.MAX_VALUE;
+ if (curTicks - startTicks > maxTicks) {
+ throw new TimeExceeded(timer.tick * (curTicks - startTicks), doc);
+ }
+ }
super.collect(doc, score);
}
+ } private static class LimitExceeded extends RuntimeException {
+ private int maxDoc;
+ public LimitExceeded(int maxDoc) { this.maxDoc = maxDoc; }
}
-
+
private LinkedHashMap cache; // an LRU cache of QueryFilter
private float threshold;
private int searcherMaxHits;
+ private int tickLength;
+
+ private int maxTickCount;
+
/**
* Construct an optimizer that caches and uses filters for required clauses
* whose boost is zero.
@@ -82,6 +156,11 @@
return size() > cacheSize; // limit size of cache
}
};
+ this.tickLength = conf.getInt("searcher.max.time.tick_length", 200);
+ this.maxTickCount = conf.getInt("searcher.max.time.tick_count", -1);
+ if (this.maxTickCount > 0) {
+ initTimerThread(this.tickLength);
+ }
}
public TopDocs optimize(BooleanQuery original,
@@ -157,22 +236,29 @@
if (sortField == null && !reverse) {
// no hit limit
- if (this.searcherMaxHits <= 0) {
+ if (this.searcherMaxHits <= 0 && timerThread == null) {
return searcher.search(query, filter, numHits);
}
- // hits limited -- use a LimitedCollector
- LimitedCollector collector = new LimitedCollector(numHits, searcherMaxHits);
+ // hits limited in time or in count -- use a LimitedCollector
+ LimitedCollector collector = new LimitedCollector(numHits, searcherMaxHits,
+ maxTickCount, timerThread);
LimitExceeded exceeded = null;
+ TimeExceeded timeExceeded = null;
try {
searcher.search(query, filter, collector);
} catch (LimitExceeded le) {
exceeded = le;
+ } catch (TimeExceeded te) {
+ timeExceeded = te;
}
TopDocs results = collector.topDocs();
if (exceeded != null) { // limit was exceeded
results.totalHits = (int) // must estimate totalHits
(results.totalHits*(searcher.maxDoc()/(float)exceeded.maxDoc));
+ } else if (timeExceeded != null) {
+ // Estimate total hits.
+ results.totalHits = (int)(results.totalHits * (searcher.maxDoc()/(float)timeExceeded.maxDoc));
}
return results;