You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by ab...@apache.org on 2010/07/21 20:05:57 UTC
svn commit: r966337 - in /nutch/branches/nutchbase/src/test/org/apache/nutch:
crawl/TestGenerator.java crawl/TestInjector.java fetcher/TestFetcher.java
util/AbstractNutchTest.java util/CrawlTestUtil.java
Author: ab
Date: Wed Jul 21 18:05:56 2010
New Revision: 966337
URL: http://svn.apache.org/viewvc?rev=966337&view=rev
Log:
Move around some methods...
Modified:
nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestGenerator.java
nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestInjector.java
nutch/branches/nutchbase/src/test/org/apache/nutch/fetcher/TestFetcher.java
nutch/branches/nutchbase/src/test/org/apache/nutch/util/AbstractNutchTest.java
nutch/branches/nutchbase/src/test/org/apache/nutch/util/CrawlTestUtil.java
Modified: nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestGenerator.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestGenerator.java?rev=966337&r1=966336&r2=966337&view=diff
==============================================================================
--- nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestGenerator.java (original)
+++ nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestGenerator.java Wed Jul 21 18:05:56 2010
@@ -26,6 +26,7 @@ import org.apache.hadoop.conf.Configurat
import org.apache.nutch.storage.Mark;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.AbstractNutchTest;
+import org.apache.nutch.util.CrawlTestUtil;
import org.apache.nutch.util.TableUtil;
/**
@@ -68,7 +69,7 @@ public class TestGenerator extends Abstr
generateFetchlist(NUM_RESULTS, conf, false);
- ArrayList<URLWebPage> l = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+ ArrayList<URLWebPage> l = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
// sort urls by score desc
Collections.sort(l, new ScoreComparator());
@@ -127,7 +128,7 @@ public class TestGenerator extends Abstr
myConfiguration.set(GeneratorJob.GENERATOR_COUNT_MODE, GeneratorJob.GENERATOR_COUNT_VALUE_HOST);
generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
- ArrayList<URLWebPage> fetchList = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+ ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
// verify we got right amount of records
assertEquals(1, fetchList.size());
@@ -136,7 +137,7 @@ public class TestGenerator extends Abstr
myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 2);
generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
- fetchList = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+ fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
// verify we got right amount of records
assertEquals(2, fetchList.size());
@@ -145,7 +146,7 @@ public class TestGenerator extends Abstr
myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 3);
generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
- fetchList = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+ fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
// verify we got right amount of records
assertEquals(3, fetchList.size());
@@ -178,7 +179,7 @@ public class TestGenerator extends Abstr
generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
- ArrayList<URLWebPage> fetchList = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+ ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
// verify we got right amount of records
assertEquals(1, fetchList.size());
@@ -187,7 +188,7 @@ public class TestGenerator extends Abstr
myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 2);
generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
- fetchList = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+ fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
// verify we got right amount of records
assertEquals(2, fetchList.size());
@@ -196,7 +197,7 @@ public class TestGenerator extends Abstr
myConfiguration.setInt(GeneratorJob.GENERATOR_MAX_COUNT, 3);
generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
- fetchList = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+ fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
// verify we got right amount of records
assertEquals(3, fetchList.size());
@@ -226,13 +227,13 @@ public class TestGenerator extends Abstr
generateFetchlist(Integer.MAX_VALUE, myConfiguration, true);
- ArrayList<URLWebPage> fetchList = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+ ArrayList<URLWebPage> fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
assertEquals(0, fetchList.size());
generateFetchlist(Integer.MAX_VALUE, myConfiguration, false);
- fetchList = readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
+ fetchList = CrawlTestUtil.readContents(webPageStore, Mark.GENERATE_MARK, FIELDS);
// verify nothing got filtered
assertEquals(list.size(), fetchList.size());
Modified: nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestInjector.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestInjector.java?rev=966337&r1=966336&r2=966337&view=diff
==============================================================================
--- nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestInjector.java (original)
+++ nutch/branches/nutchbase/src/test/org/apache/nutch/crawl/TestInjector.java Wed Jul 21 18:05:56 2010
@@ -112,7 +112,7 @@ public class TestInjector extends Abstra
};
private List<String> readDb() throws Exception {
- List<URLWebPage> pages = readContents(webPageStore, null, fields);
+ List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore, null, fields);
ArrayList<String> read = new ArrayList<String>();
for (URLWebPage up : pages) {
WebPage page = up.getDatum();
Modified: nutch/branches/nutchbase/src/test/org/apache/nutch/fetcher/TestFetcher.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchbase/src/test/org/apache/nutch/fetcher/TestFetcher.java?rev=966337&r1=966336&r2=966337&view=diff
==============================================================================
--- nutch/branches/nutchbase/src/test/org/apache/nutch/fetcher/TestFetcher.java (original)
+++ nutch/branches/nutchbase/src/test/org/apache/nutch/fetcher/TestFetcher.java Wed Jul 21 18:05:56 2010
@@ -94,7 +94,7 @@ public class TestFetcher extends Abstrac
conf.getFloat("fetcher.server.delay", 5));
assertTrue(time > minimumTime);
- List<URLWebPage> pages = readContents(webPageStore, Mark.FETCH_MARK, null);
+ List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore, Mark.FETCH_MARK, (String[])null);
assertEquals(urls.size(), pages.size());
List<String> handledurls = new ArrayList<String>();
for (URLWebPage up : pages) {
Modified: nutch/branches/nutchbase/src/test/org/apache/nutch/util/AbstractNutchTest.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchbase/src/test/org/apache/nutch/util/AbstractNutchTest.java?rev=966337&r1=966336&r2=966337&view=diff
==============================================================================
--- nutch/branches/nutchbase/src/test/org/apache/nutch/util/AbstractNutchTest.java (original)
+++ nutch/branches/nutchbase/src/test/org/apache/nutch/util/AbstractNutchTest.java Wed Jul 21 18:05:56 2010
@@ -39,11 +39,8 @@ import org.gora.store.DataStoreFactory;
import org.gora.util.ByteUtils;
/**
- * Basic injector test: 1. Creates a text file with urls 2. Injects them into
- * crawldb 3. Reads crawldb entries and verifies contents 4. Injects more urls
- * into webdb 5. Reads crawldb entries and verifies contents
- *
- * @author nutch-dev <nutch-dev at lucene.apache.org>
+ * This class provides common routines for setup/teardown of an in-memory data
+ * store.
*/
public class AbstractNutchTest extends TestCase {
@@ -85,35 +82,4 @@ public class AbstractNutchTest extends T
fs.delete(testdir, true);
}
- /**
- * Read entries from a data store
- *
- * @return list of matching {@link URLWebPage} objects
- * @throws IOException
- */
- public static ArrayList<URLWebPage> readContents(DataStore<String,WebPage> store,
- Mark requiredMark, String... fields) throws IOException {
- ArrayList<URLWebPage> l = new ArrayList<URLWebPage>();
-
- Query<String, WebPage> query = store.newQuery();
- if (fields != null) {
- query.setFields(fields);
- }
-
- Result<String, WebPage> results = store.execute(query);
- while (results.next()) {
- WebPage page = results.get();
- String url = results.getKey();
-
- if (page == null)
- continue;
-
- if (requiredMark != null && requiredMark.checkMark(page) == null)
- continue;
-
- l.add(new URLWebPage(TableUtil.unreverseUrl(url), (WebPage)page.clone()));
- }
-
- return l;
- }
}
Modified: nutch/branches/nutchbase/src/test/org/apache/nutch/util/CrawlTestUtil.java
URL: http://svn.apache.org/viewvc/nutch/branches/nutchbase/src/test/org/apache/nutch/util/CrawlTestUtil.java?rev=966337&r1=966336&r2=966337&view=diff
==============================================================================
--- nutch/branches/nutchbase/src/test/org/apache/nutch/util/CrawlTestUtil.java (original)
+++ nutch/branches/nutchbase/src/test/org/apache/nutch/util/CrawlTestUtil.java Wed Jul 21 18:05:56 2010
@@ -18,6 +18,7 @@ package org.apache.nutch.util;
import java.io.IOException;
import java.net.UnknownHostException;
+import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
@@ -27,6 +28,12 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.nutch.crawl.URLWebPage;
+import org.apache.nutch.storage.Mark;
+import org.apache.nutch.storage.WebPage;
+import org.gora.query.Query;
+import org.gora.query.Result;
+import org.gora.store.DataStore;
import org.mortbay.jetty.Handler;
import org.mortbay.jetty.Server;
import org.mortbay.jetty.handler.DefaultHandler;
@@ -88,6 +95,39 @@ public class CrawlTestUtil {
out.flush();
out.close();
}
+
+ /**
+ * Read entries from a data store
+ *
+ * @return list of matching {@link URLWebPage} objects
+ * @throws IOException
+ */
+ public static ArrayList<URLWebPage> readContents(DataStore<String,WebPage> store,
+ Mark requiredMark, String... fields) throws IOException {
+ ArrayList<URLWebPage> l = new ArrayList<URLWebPage>();
+
+ Query<String, WebPage> query = store.newQuery();
+ if (fields != null) {
+ query.setFields(fields);
+ }
+
+ Result<String, WebPage> results = store.execute(query);
+ while (results.next()) {
+ WebPage page = results.get();
+ String url = results.getKey();
+
+ if (page == null)
+ continue;
+
+ if (requiredMark != null && requiredMark.checkMark(page) == null)
+ continue;
+
+ l.add(new URLWebPage(TableUtil.unreverseUrl(url), (WebPage)page.clone()));
+ }
+
+ return l;
+ }
+
/**
* Creates a new JettyServer with one static root context