You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ku...@apache.org on 2008/12/02 15:55:39 UTC

svn commit: r722481 - in /lucene/nutch/trunk: CHANGES.txt src/java/org/apache/nutch/tools/SearchLoadTester.java

Author: kubes
Date: Tue Dec  2 06:55:38 2008
New Revision: 722481

URL: http://svn.apache.org/viewvc?rev=722481&view=rev
Log:
NUTCH-665: Search Load Testing Tool

Added:
    lucene/nutch/trunk/src/java/org/apache/nutch/tools/SearchLoadTester.java
Modified:
    lucene/nutch/trunk/CHANGES.txt

Modified: lucene/nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/CHANGES.txt?rev=722481&r1=722480&r2=722481&view=diff
==============================================================================
--- lucene/nutch/trunk/CHANGES.txt (original)
+++ lucene/nutch/trunk/CHANGES.txt Tue Dec  2 06:55:38 2008
@@ -289,6 +289,8 @@
 106. NUTCH-663 - Upgrade Nutch to use Hadoop 0.19 (kubes)
 
 107. NUTCH-647 - Resolve URLs tool (kubes)
+
+108. NUTCH-665 - Search Load Testing Tool (kubes)
      
 Release 0.9 - 2007-04-02
 

Added: lucene/nutch/trunk/src/java/org/apache/nutch/tools/SearchLoadTester.java
URL: http://svn.apache.org/viewvc/lucene/nutch/trunk/src/java/org/apache/nutch/tools/SearchLoadTester.java?rev=722481&view=auto
==============================================================================
--- lucene/nutch/trunk/src/java/org/apache/nutch/tools/SearchLoadTester.java (added)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/tools/SearchLoadTester.java Tue Dec  2 06:55:38 2008
@@ -0,0 +1,182 @@
+package org.apache.nutch.tools;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.GnuParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Option;
+import org.apache.commons.cli.OptionBuilder;
+import org.apache.commons.cli.Options;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.util.StringUtils;
+import org.apache.nutch.searcher.Hits;
+import org.apache.nutch.searcher.NutchBean;
+import org.apache.nutch.searcher.Query;
+import org.apache.nutch.util.NutchConfiguration;
+
+/**
+ * <p>A simple tool to perform load testing on configured search servers.  A 
+ * queries file can be specified with a list of different queries to run against
+ * the search servers.  The number of threads used to perform concurrent
+ * searches is also configurable.</p>
+ * 
+ * <p>This tool will output approximate times for running all queries in the 
+ * queries file.  If configured it will also print out individual queries times
+ * to the log.</p>
+ */
+public class SearchLoadTester {
+
+  public static final Log LOG = LogFactory.getLog(SearchLoadTester.class);
+
+  private String queriesFile = null;
+  private int numThreads = 100;
+  private boolean showTimes = false;
+  private ExecutorService pool = null;
+  private static AtomicInteger numTotal = new AtomicInteger(0);
+  private static AtomicInteger numErrored = new AtomicInteger(0);
+  private static AtomicInteger numResolved = new AtomicInteger(0);
+  private static AtomicLong totalTime = new AtomicLong(0L);
+
+  private static Configuration conf = null;
+  private static NutchBean bean = null;
+
+  private static class SearchThread
+    extends Thread {
+
+    private String query = null;
+    private boolean showTimes = false;
+
+    public SearchThread(String query, boolean showTimes) {
+      this.query = query;
+      this.showTimes = showTimes;
+    }
+
+    public void run() {
+
+      numTotal.incrementAndGet();
+
+      try {
+        Query runner = Query.parse(query, conf);
+        long start = System.currentTimeMillis();
+        Hits hits = bean.search(runner, 10);
+        long end = System.currentTimeMillis();
+        numResolved.incrementAndGet();
+        long total = (end - start);
+        if (showTimes) {
+          System.out.println("Query for " + query + " numhits "
+            + hits.getTotal() + " in " + total + "ms");
+        }
+        totalTime.addAndGet(total);
+      }
+      catch (Exception uhe) {
+        LOG.info("Error executing search for " + query);
+        numErrored.incrementAndGet();
+      }
+    }
+  }
+
+  public void testSearch() {
+
+    try {
+
+      // create a thread pool with a fixed number of threads
+      pool = Executors.newFixedThreadPool(numThreads);
+
+      // read in the queries file and loop through each line, one query per line
+      BufferedReader buffRead = new BufferedReader(new FileReader(new File(
+        queriesFile)));
+      String queryStr = null;
+      while ((queryStr = buffRead.readLine()) != null) {
+        pool.execute(new SearchThread(queryStr, showTimes));
+      }
+
+      // close the file and wait for up to 60 seconds before shutting down
+      // the thread pool to give urls time to finish resolving
+      buffRead.close();
+      pool.shutdown();
+      pool.awaitTermination(60, TimeUnit.SECONDS);
+
+      LOG.info("Total Queries: " + numTotal.get() + ", Errored: "
+        + numErrored.get() + ", Total Time: " + totalTime.get()
+        + ", Average Time: " + totalTime.get() / numTotal.get()
+        + " with " + numThreads + " threads");
+    }
+    catch (Exception e) {
+      e.printStackTrace();
+      // on error shutdown the thread pool immediately
+      pool.shutdownNow();
+      LOG.info(StringUtils.stringifyException(e));
+    }
+
+  }
+
+  public SearchLoadTester(String queriesFile)
+    throws IOException {
+    this(queriesFile, 100, false);
+  }
+
+  public SearchLoadTester(String queriesFile, int numThreads, boolean showTimes)
+    throws IOException {
+    this.queriesFile = queriesFile;
+    this.numThreads = numThreads;
+    this.showTimes = showTimes;
+    this.conf = NutchConfiguration.create();
+    this.bean = new NutchBean(conf);
+  }
+
+  public static void main(String[] args) {
+
+    Options options = new Options();
+    Option helpOpts = OptionBuilder.withArgName("help").withDescription(
+      "show this help message").create("help");
+    Option queriesOpts = OptionBuilder.withArgName("queries").hasArg().withDescription(
+      "the queries file to test").create("queries");
+    Option numThreadOpts = OptionBuilder.withArgName("numThreads").hasArgs().withDescription(
+      "the number of threads to use").create("numThreads");
+    Option showTimesOpts = OptionBuilder.withArgName("showTimes").withDescription(
+      "show individual query times").create("showTimes");
+    options.addOption(helpOpts);
+    options.addOption(queriesOpts);
+    options.addOption(numThreadOpts);
+    options.addOption(showTimesOpts);
+
+    CommandLineParser parser = new GnuParser();
+    try {
+
+      // parse out common line arguments
+      CommandLine line = parser.parse(options, args);
+      if (line.hasOption("help") || !line.hasOption("queries")) {
+        HelpFormatter formatter = new HelpFormatter();
+        formatter.printHelp("SearchTester", options);
+        return;
+      }
+
+      // get the urls and the number of threads and start the resolver
+      boolean showTimes = line.hasOption("showTimes");
+      String queries = line.getOptionValue("queries");
+      int numThreads = 10;
+      String numThreadsStr = line.getOptionValue("numThreads");
+      if (numThreadsStr != null) {
+        numThreads = Integer.parseInt(numThreadsStr);
+      }
+      SearchLoadTester tester = new SearchLoadTester(queries, numThreads, showTimes);
+      tester.testSearch();
+    }
+    catch (Exception e) {
+      LOG.fatal("SearchTester: " + StringUtils.stringifyException(e));
+    }
+  }
+
+}