You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2010/07/21 20:05:57 UTC

svn commit: r966337 - in /nutch/branches/nutchbase/src/test/org/apache/nutch: crawl/TestGenerator.java crawl/TestInjector.java fetcher/TestFetcher.java util/AbstractNutchTest.java util/CrawlTestUtil.java

Author: ab
Date: Wed Jul 21 18:05:56 2010
New Revision: 966337

URL: http://svn.apache.org/viewvc?rev=966337&view=rev
Log:
Move around some methods...

Modified:
    nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestGenerator.java
    nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestInjector.java
    nutch/branches/nutchbase/src/test/org/apache/nutch/fetcher/TestFetcher.java
    nutch/branches/nutchbase/src/test/org/apache/nutch/util/AbstractNutchTest.java
    nutch/branches/nutchbase/src/test/org/apache/nutch/util/CrawlTestUtil.java

Modified: nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestGenerator.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestGenerator.java?rev=966337&r1=966336&r2=966337&view=diff
==============================================================================
--- nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestGenerator.java (original)
+++ nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestGenerator.java Wed Jul 21 18:05:56 2010
@@ -26,6 +26,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.nutch.storage.Mark;
 import org.apache.nutch.storage.WebPage;
 import org.apache.nutch.util.AbstractNutchTest;
+import org.apache.nutch.util.CrawlTestUtil;
 import org.apache.nutch.util.TableUtil;
 
 /**
@@ -68,7 +69,7 @@ public class TestGenerator extends Abstr
 
     generateFetchlist(NUM_RESULTS, conf, false);
 
-    ArrayList<URLWebPage> l = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    ArrayList<URLWebPage> l = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
 
     // sort urls by score desc
     Collections.sort(l, new ScoreComparator());
@@ -127,7 +128,7 @@ public class TestGenerator extends Abstr
     myConfiguration.set(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    ArrayList<URLWebPage> fetchList = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
 
     // verify we got right amount of records
     assertEquals(1, fetchList.size());
@@ -136,7 +137,7 @@ public class TestGenerator extends Abstr
     myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 2);
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    fetchList = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
 
     // verify we got right amount of records
     assertEquals(2, fetchList.size());
@@ -145,7 +146,7 @@ public class TestGenerator extends Abstr
     myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 3);
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    fetchList = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
 
     // verify we got right amount of records
     assertEquals(3, fetchList.size());
@@ -178,7 +179,7 @@ public class TestGenerator extends Abstr
 
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    ArrayList<URLWebPage> fetchList = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
 
     // verify we got right amount of records
     assertEquals(1, fetchList.size());
@@ -187,7 +188,7 @@ public class TestGenerator extends Abstr
     myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 2);
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    fetchList = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
 
     // verify we got right amount of records
     assertEquals(2, fetchList.size());
@@ -196,7 +197,7 @@ public class TestGenerator extends Abstr
     myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 3);
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    fetchList = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
 
     // verify we got right amount of records
     assertEquals(3, fetchList.size());
@@ -226,13 +227,13 @@ public class TestGenerator extends Abstr
 
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, true);
 
-    ArrayList<URLWebPage> fetchList = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
 
     assertEquals(0, fetchList.size());
 
     generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
 
-    fetchList = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+    fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
 
     // verify nothing got filtered
     assertEquals(list.size(), fetchList.size());

Modified: nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestInjector.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestInjector.java?rev=966337&r1=966336&r2=966337&view=diff
==============================================================================
--- nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestInjector.java (original)
+++ nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestInjector.java Wed Jul 21 18:05:56 2010
@@ -112,7 +112,7 @@ public class TestInjector extends Abstra
   };
   
   private List<String> readDb() throws Exception {
-    List<URLWebPage> pages = readContents(webPageStore, null, fields);
+    List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore, null, fields);
     ArrayList<String> read = new ArrayList<String>();
     for (URLWebPage up : pages) {
       WebPage page = up.getDatum();

Modified: nutch/branches/nutchbase/src/test/org/apache/nutch/fetcher/TestFetcher.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchbase/src/test/org/apache/nutch/fetcher/TestFetcher.java?rev=966337&r1=966336&r2=966337&view=diff
==============================================================================
--- nutch/branches/nutchbase/src/test/org/apache/nutch/fetcher/TestFetcher.java (original)
+++ nutch/branches/nutchbase/src/test/org/apache/nutch/fetcher/TestFetcher.java Wed Jul 21 18:05:56 2010
@@ -94,7 +94,7 @@ public class TestFetcher extends Abstrac
         conf.getFloat("fetcher.server.delay", 5));
     assertTrue(time > minimumTime);
     
-    List<URLWebPage> pages = readContents(webPageStore, Mark.FETCH_MARK, null);
+    List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore, Mark.FETCH_MARK, (String[])null);
     assertEquals(urls.size(), pages.size());
     List<String> handledurls = new ArrayList<String>();
     for (URLWebPage up : pages) {

Modified: nutch/branches/nutchbase/src/test/org/apache/nutch/util/AbstractNutchTest.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchbase/src/test/org/apache/nutch/util/AbstractNutchTest.java?rev=966337&r1=966336&r2=966337&view=diff
==============================================================================
--- nutch/branches/nutchbase/src/test/org/apache/nutch/util/AbstractNutchTest.java (original)
+++ nutch/branches/nutchbase/src/test/org/apache/nutch/util/AbstractNutchTest.java Wed Jul 21 18:05:56 2010
@@ -39,11 +39,8 @@ import org.gora.store.DataStoreFactory;
 import org.gora.util.ByteUtils;
 
 /**
- * Basic injector test: 1. Creates a text file with urls 2. Injects them into
- * crawldb 3. Reads crawldb entries and verifies contents 4. Injects more urls
- * into webdb 5. Reads crawldb entries and verifies contents
- * 
- * @author nutch-dev <nutch-dev at lucene.apache.org>
+ * This class provides common routines for setup/teardown of an in-memory data
+ * store.
  */
 public class AbstractNutchTest extends TestCase {
 
@@ -85,35 +82,4 @@ public class AbstractNutchTest extends T
     fs.delete(testdir, true);
   }
 
-  /**
-   * Read entries from a data store
-   *
-   * @return list of matching {@link URLWebPage} objects
-   * @throws IOException
-   */
-  public static ArrayList<URLWebPage> readContents(DataStore<String,WebPage> store,
-      Mark requiredMark, String... fields) throws IOException {
-    ArrayList<URLWebPage> l = new ArrayList<URLWebPage>();
-
-    Query<String, WebPage> query = store.newQuery();
-    if (fields != null) {
-      query.setFields(fields);
-    }
-
-    Result<String, WebPage> results = store.execute(query);
-    while (results.next()) {
-      WebPage page = results.get();
-      String url = results.getKey();
-
-      if (page == null)
-        continue;
-
-      if (requiredMark != null && requiredMark.checkMark(page) == null)
-        continue;
-
-      l.add(new URLWebPage(TableUtil.unreverseUrl(url), (WebPage)page.clone()));
-    }
-
-    return l;
-  }
 }

Modified: nutch/branches/nutchbase/src/test/org/apache/nutch/util/CrawlTestUtil.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchbase/src/test/org/apache/nutch/util/CrawlTestUtil.java?rev=966337&r1=966336&r2=966337&view=diff
==============================================================================
--- nutch/branches/nutchbase/src/test/org/apache/nutch/util/CrawlTestUtil.java (original)
+++ nutch/branches/nutchbase/src/test/org/apache/nutch/util/CrawlTestUtil.java Wed Jul 21 18:05:56 2010
@@ -18,6 +18,7 @@ package org.apache.nutch.util;
 
 import java.io.IOException;
 import java.net.UnknownHostException;
+import java.util.ArrayList;
 import java.util.Iterator;
 import java.util.List;
 
@@ -27,6 +28,12 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.nutch.crawl.URLWebPage;
+import org.apache.nutch.storage.Mark;
+import org.apache.nutch.storage.WebPage;
+import org.gora.query.Query;
+import org.gora.query.Result;
+import org.gora.store.DataStore;
 import org.mortbay.jetty.Handler;
 import org.mortbay.jetty.Server;
 import org.mortbay.jetty.handler.DefaultHandler;
@@ -88,6 +95,39 @@ public class CrawlTestUtil {
     out.flush();
     out.close();
   }
+  
+  /**
+   * Read entries from a data store
+   *
+   * @return list of matching {@link URLWebPage} objects
+   * @throws IOException
+   */
+  public static ArrayList<URLWebPage> readContents(DataStore<String,WebPage> store,
+      Mark requiredMark, String... fields) throws IOException {
+    ArrayList<URLWebPage> l = new ArrayList<URLWebPage>();
+
+    Query<String, WebPage> query = store.newQuery();
+    if (fields != null) {
+      query.setFields(fields);
+    }
+
+    Result<String, WebPage> results = store.execute(query);
+    while (results.next()) {
+      WebPage page = results.get();
+      String url = results.getKey();
+
+      if (page == null)
+        continue;
+
+      if (requiredMark != null && requiredMark.checkMark(page) == null)
+        continue;
+
+      l.add(new URLWebPage(TableUtil.unreverseUrl(url), (WebPage)page.clone()));
+    }
+
+    return l;
+  }
+
 
   /**
    * Creates a new JettyServer with one static root context