You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2015/01/09 07:34:37 UTC
svn commit: r1650447 [24/25] - in /nutch/branches/2.x: ./
src/java/org/apache/nutch/api/ src/java/org/apache/nutch/api/impl/
src/java/org/apache/nutch/api/impl/db/
src/java/org/apache/nutch/api/model/response/
src/java/org/apache/nutch/api/resources/ s...
Modified: nutch/branches/2.x/src/test/org/apache/nutch/fetcher/TestFetcher.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/fetcher/TestFetcher.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/fetcher/TestFetcher.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/fetcher/TestFetcher.java Fri Jan 9 06:34:33 2015
@@ -38,32 +38,29 @@ import org.junit.Test;
import static org.junit.Assert.*;
/**
- * Basic fetcher test
- * 1. generate seedlist
- * 2. inject
- * 3. generate
- * 3. fetch
- * 4. Verify contents
- *
+ * Basic fetcher test 1. generate seedlist 2. inject 3. generate 3. fetch 4.
+ * Verify contents
+ *
*/
public class TestFetcher extends AbstractNutchTest {
- final static Path testdir=new Path("build/test/fetch-test");
+ final static Path testdir = new Path("build/test/fetch-test");
Path urlPath;
Server server;
@Override
@Before
- public void setUp() throws Exception{
+ public void setUp() throws Exception {
super.setUp();
urlPath = new Path(testdir, "urls");
- server = CrawlTestUtil.getServer(conf.getInt("content.server.port",50000), "build/test/data/fetch-test-site");
+ server = CrawlTestUtil.getServer(conf.getInt("content.server.port", 50000),
+ "build/test/data/fetch-test-site");
server.start();
}
@Override
@After
- public void tearDown() throws Exception{
+ public void tearDown() throws Exception {
server.stop();
fs.delete(testdir, true);
}
@@ -72,28 +69,28 @@ public class TestFetcher extends Abstrac
@Ignore("Temporarily diable until NUTCH-1572 is addressed.")
public void testFetch() throws Exception {
- //generate seedlist
+ // generate seedlist
ArrayList<String> urls = new ArrayList<String>();
- addUrl(urls,"index.html");
- addUrl(urls,"pagea.html");
- addUrl(urls,"pageb.html");
- addUrl(urls,"dup_of_pagea.html");
- addUrl(urls,"nested_spider_trap.html");
- addUrl(urls,"exception.html");
+ addUrl(urls, "index.html");
+ addUrl(urls, "pagea.html");
+ addUrl(urls, "pageb.html");
+ addUrl(urls, "dup_of_pagea.html");
+ addUrl(urls, "nested_spider_trap.html");
+ addUrl(urls, "exception.html");
CrawlTestUtil.generateSeedList(fs, urlPath, urls);
- //inject
+ // inject
InjectorJob injector = new InjectorJob(conf);
injector.inject(urlPath);
- //generate
+ // generate
long time = System.currentTimeMillis();
GeneratorJob g = new GeneratorJob(conf);
String batchId = g.generate(Long.MAX_VALUE, time, false, false);
- //fetch
+ // fetch
time = System.currentTimeMillis();
conf.setBoolean(FetcherJob.PARSE_KEY, true);
FetcherJob fetcher = new FetcherJob(conf);
@@ -101,12 +98,13 @@ public class TestFetcher extends Abstrac
time = System.currentTimeMillis() - time;
- //verify politeness, time taken should be more than (num_of_pages +1)*delay
- int minimumTime = (int) ((urls.size() + 1) * 1000 *
- conf.getFloat("fetcher.server.delay", 5));
+ // verify politeness, time taken should be more than (num_of_pages +1)*delay
+ int minimumTime = (int) ((urls.size() + 1) * 1000 * conf.getFloat(
+ "fetcher.server.delay", 5));
assertTrue(time > minimumTime);
- List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore, Mark.FETCH_MARK, (String[])null);
+ List<URLWebPage> pages = CrawlTestUtil.readContents(webPageStore,
+ Mark.FETCH_MARK, (String[]) null);
assertEquals(urls.size(), pages.size());
List<String> handledurls = new ArrayList<String>();
for (URLWebPage up : pages) {
@@ -115,23 +113,24 @@ public class TestFetcher extends Abstrac
continue;
}
String content = Bytes.toString(bb);
- if (content.indexOf("Nutch fetcher test page")!=-1) {
+ if (content.indexOf("Nutch fetcher test page") != -1) {
handledurls.add(up.getUrl());
}
}
Collections.sort(urls);
Collections.sort(handledurls);
- //verify that enough pages were handled
+ // verify that enough pages were handled
assertEquals(urls.size(), handledurls.size());
- //verify that correct pages were handled
+ // verify that correct pages were handled
assertTrue(handledurls.containsAll(urls));
assertTrue(urls.containsAll(handledurls));
}
private void addUrl(ArrayList<String> urls, String page) {
- urls.add("http://127.0.0.1:" + server.getConnectors()[0].getPort() + "/" + page);
+ urls.add("http://127.0.0.1:" + server.getConnectors()[0].getPort() + "/"
+ + page);
}
@Test
Modified: nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java Fri Jan 9 06:34:33 2015
@@ -30,6 +30,7 @@ public class TestIndexingFilters {
/**
* Test behaviour when defined filter does not exist.
+ *
* @throws IndexingException
*/
@Test
@@ -46,15 +47,16 @@ public class TestIndexingFilters {
WebPage page = WebPage.newBuilder().build();
page.setText(new Utf8("text"));
page.setTitle(new Utf8("title"));
- filters.filter(new NutchDocument(),"http://www.example.com/",page);
+ filters.filter(new NutchDocument(), "http://www.example.com/", page);
}
/**
* Test behaviour when NutchDOcument is null
+ *
* @throws IndexingException
*/
@Test
- public void testNutchDocumentNullIndexingFilter() throws IndexingException{
+ public void testNutchDocumentNullIndexingFilter() throws IndexingException {
Configuration conf = NutchConfiguration.create();
conf.addResource("nutch-default.xml");
conf.addResource("crawl-tests.xml");
@@ -63,18 +65,18 @@ public class TestIndexingFilters {
WebPage page = WebPage.newBuilder().build();
page.setText(new Utf8("text"));
page.setTitle(new Utf8("title"));
- NutchDocument doc = filters.filter(null,"http://www.example.com/",page);
+ NutchDocument doc = filters.filter(null, "http://www.example.com/", page);
assertNull(doc);
}
/**
* Test behaviour when reset the index filter order will not take effect
- *
+ *
* @throws IndexingException
*/
@Test
- public void testFilterCacheIndexingFilter() throws IndexingException{
+ public void testFilterCacheIndexingFilter() throws IndexingException {
Configuration conf = NutchConfiguration.create();
conf.addResource("nutch-default.xml");
conf.addResource("crawl-tests.xml");
@@ -86,18 +88,20 @@ public class TestIndexingFilters {
WebPage page = WebPage.newBuilder().build();
page.setText(new Utf8("text"));
page.setTitle(new Utf8("title"));
- NutchDocument fdoc1 = filters1.filter(new NutchDocument(),"http://www.example.com/",page);
+ NutchDocument fdoc1 = filters1.filter(new NutchDocument(),
+ "http://www.example.com/", page);
// add another index filter
String class2 = "org.apache.nutch.indexer.metadata.MetadataIndexer";
// set content metadata
Metadata md = new Metadata();
- md.add("example","data");
+ md.add("example", "data");
// add MetadataIndxer filter
conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2);
IndexingFilters filters2 = new IndexingFilters(conf);
- NutchDocument fdoc2 = filters2.filter(new NutchDocument(),"http://www.example.com/",page);
- assertEquals(fdoc1.getFieldNames().size(),fdoc2.getFieldNames().size());
+ NutchDocument fdoc2 = filters2.filter(new NutchDocument(),
+ "http://www.example.com/", page);
+ assertEquals(fdoc1.getFieldNames().size(), fdoc2.getFieldNames().size());
}
}
Modified: nutch/branches/2.x/src/test/org/apache/nutch/metadata/TestMetadata.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/metadata/TestMetadata.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/metadata/TestMetadata.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/metadata/TestMetadata.java Fri Jan 9 06:34:33 2015
@@ -278,4 +278,3 @@ public class TestMetadata {
}
}
-
Modified: nutch/branches/2.x/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java Fri Jan 9 06:34:33 2015
@@ -28,7 +28,7 @@ import static org.junit.Assert.*;
/**
* JUnit based tests of class
* {@link org.apache.nutch.metadata.SpellCheckedMetadata}.
- *
+ *
* @author Chris Mattmann
* @author Jérôme Charron
*/
@@ -40,20 +40,20 @@ public class TestSpellCheckedMetadata {
/** Test for the <code>getNormalizedName(String)</code> method. */
@Test
public void testGetNormalizedName() {
- assertEquals("Content-Type", SpellCheckedMetadata
- .getNormalizedName("Content-Type"));
- assertEquals("Content-Type", SpellCheckedMetadata
- .getNormalizedName("ContentType"));
- assertEquals("Content-Type", SpellCheckedMetadata
- .getNormalizedName("Content-type"));
- assertEquals("Content-Type", SpellCheckedMetadata
- .getNormalizedName("contenttype"));
- assertEquals("Content-Type", SpellCheckedMetadata
- .getNormalizedName("contentype"));
- assertEquals("Content-Type", SpellCheckedMetadata
- .getNormalizedName("contntype"));
+ assertEquals("Content-Type",
+ SpellCheckedMetadata.getNormalizedName("Content-Type"));
+ assertEquals("Content-Type",
+ SpellCheckedMetadata.getNormalizedName("ContentType"));
+ assertEquals("Content-Type",
+ SpellCheckedMetadata.getNormalizedName("Content-type"));
+ assertEquals("Content-Type",
+ SpellCheckedMetadata.getNormalizedName("contenttype"));
+ assertEquals("Content-Type",
+ SpellCheckedMetadata.getNormalizedName("contentype"));
+ assertEquals("Content-Type",
+ SpellCheckedMetadata.getNormalizedName("contntype"));
}
-
+
/** Test for the <code>add(String, String)</code> method. */
@Test
public void testAdd() {
@@ -253,8 +253,8 @@ public class TestSpellCheckedMetadata {
}
/**
- * IO Test method, usable only when you plan to do changes in metadata
- * to measure relative performance impact.
+ * IO Test method, usable only when you plan to do changes in metadata to
+ * measure relative performance impact.
*/
@Test
public final void testHandlingSpeed() {
Modified: nutch/branches/2.x/src/test/org/apache/nutch/net/TestURLFilters.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/net/TestURLFilters.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/net/TestURLFilters.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/net/TestURLFilters.java Fri Jan 9 06:34:33 2015
@@ -26,6 +26,7 @@ public class TestURLFilters {
/**
* Testcase for NUTCH-325.
+ *
* @throws URLFilterException
*/
@Test
Modified: nutch/branches/2.x/src/test/org/apache/nutch/net/TestURLNormalizers.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/net/TestURLNormalizers.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/net/TestURLNormalizers.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/net/TestURLNormalizers.java Fri Jan 9 06:34:33 2015
@@ -32,30 +32,38 @@ public class TestURLNormalizers {
String clazz1 = "org.apache.nutch.net.urlnormalizer.regex.RegexURLNormalizer";
String clazz2 = "org.apache.nutch.net.urlnormalizer.basic.BasicURLNormalizer";
conf.set("urlnormalizer.order", clazz1 + " " + clazz2);
-
- URLNormalizers normalizers = new URLNormalizers(conf, URLNormalizers.SCOPE_DEFAULT);
-
+
+ URLNormalizers normalizers = new URLNormalizers(conf,
+ URLNormalizers.SCOPE_DEFAULT);
+
assertNotNull(normalizers);
try {
- normalizers.normalize("http://www.example.com/", URLNormalizers.SCOPE_DEFAULT);
+ normalizers.normalize("http://www.example.com/",
+ URLNormalizers.SCOPE_DEFAULT);
} catch (MalformedURLException mue) {
fail(mue.toString());
}
// NUTCH-1011 - Get rid of superfluous slashes
try {
- String normalizedSlashes = normalizers.normalize("http://www.example.org//path/to//somewhere.html", URLNormalizers.SCOPE_DEFAULT);
- assertEquals(normalizedSlashes, "http://www.example.org/path/to/somewhere.html");
+ String normalizedSlashes = normalizers.normalize(
+ "http://www.example.org//path/to//somewhere.html",
+ URLNormalizers.SCOPE_DEFAULT);
+ assertEquals(normalizedSlashes,
+ "http://www.example.org/path/to/somewhere.html");
} catch (MalformedURLException mue) {
fail(mue.toString());
}
// check the order
int pos1 = -1, pos2 = -1;
- URLNormalizer[] impls = normalizers.getURLNormalizers(URLNormalizers.SCOPE_DEFAULT);
+ URLNormalizer[] impls = normalizers
+ .getURLNormalizers(URLNormalizers.SCOPE_DEFAULT);
for (int i = 0; i < impls.length; i++) {
- if (impls[i].getClass().getName().equals(clazz1)) pos1 = i;
- if (impls[i].getClass().getName().equals(clazz2)) pos2 = i;
+ if (impls[i].getClass().getName().equals(clazz1))
+ pos1 = i;
+ if (impls[i].getClass().getName().equals(clazz2))
+ pos2 = i;
}
if (pos1 != -1 && pos2 != -1) {
assertTrue("RegexURLNormalizer before BasicURLNormalizer", pos1 < pos2);
Modified: nutch/branches/2.x/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java Fri Jan 9 06:34:33 2015
@@ -14,7 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
+
package org.apache.nutch.parse;
import org.apache.nutch.parse.Outlink;
@@ -35,51 +35,60 @@ import static org.junit.Assert.*;
public class TestOutlinkExtractor {
private static Configuration conf = NutchConfiguration.create();
+
public void testGetNoOutlinks() {
- Outlink[] outlinks = null;
-
+ Outlink[] outlinks = null;
+
outlinks = OutlinkExtractor.getOutlinks(null, conf);
assertNotNull(outlinks);
assertEquals(0, outlinks.length);
-
+
outlinks = OutlinkExtractor.getOutlinks("", conf);
assertNotNull(outlinks);
assertEquals(0, outlinks.length);
}
-
+
@Test
public void testGetOutlinksHttp() {
- Outlink[] outlinks = OutlinkExtractor.getOutlinks(
- "Test with http://www.nutch.org/index.html is it found? " +
- "What about www.google.com at http://www.google.de " +
- "A longer URL could be http://www.sybit.com/solutions/portals.html", conf);
-
+ Outlink[] outlinks = OutlinkExtractor
+ .getOutlinks(
+ "Test with http://www.nutch.org/index.html is it found? "
+ + "What about www.google.com at http://www.google.de "
+ + "A longer URL could be http://www.sybit.com/solutions/portals.html",
+ conf);
+
assertTrue("Url not found!", outlinks.length == 3);
- assertEquals("Wrong URL", "http://www.nutch.org/index.html", outlinks[0].getToUrl());
+ assertEquals("Wrong URL", "http://www.nutch.org/index.html",
+ outlinks[0].getToUrl());
assertEquals("Wrong URL", "http://www.google.de", outlinks[1].getToUrl());
- assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html", outlinks[2].getToUrl());
+ assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html",
+ outlinks[2].getToUrl());
}
-
+
@Test
public void testGetOutlinksHttp2() {
- Outlink[] outlinks = OutlinkExtractor.getOutlinks(
- "Test with http://www.nutch.org/index.html is it found? " +
- "What about www.google.com at http://www.google.de " +
- "A longer URL could be http://www.sybit.com/solutions/portals.html", "http://www.sybit.de", conf);
-
+ Outlink[] outlinks = OutlinkExtractor
+ .getOutlinks(
+ "Test with http://www.nutch.org/index.html is it found? "
+ + "What about www.google.com at http://www.google.de "
+ + "A longer URL could be http://www.sybit.com/solutions/portals.html",
+ "http://www.sybit.de", conf);
+
assertTrue("Url not found!", outlinks.length == 3);
- assertEquals("Wrong URL", "http://www.nutch.org/index.html", outlinks[0].getToUrl());
+ assertEquals("Wrong URL", "http://www.nutch.org/index.html",
+ outlinks[0].getToUrl());
assertEquals("Wrong URL", "http://www.google.de", outlinks[1].getToUrl());
- assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html", outlinks[2].getToUrl());
+ assertEquals("Wrong URL", "http://www.sybit.com/solutions/portals.html",
+ outlinks[2].getToUrl());
}
-
+
@Test
public void testGetOutlinksFtp() {
Outlink[] outlinks = OutlinkExtractor.getOutlinks(
- "Test with ftp://www.nutch.org is it found? " +
- "What about www.google.com at ftp://www.google.de", conf);
-
- assertTrue("Url not found!", outlinks.length >1);
+ "Test with ftp://www.nutch.org is it found? "
+ + "What about www.google.com at ftp://www.google.de", conf);
+
+ assertTrue("Url not found!", outlinks.length > 1);
assertEquals("Wrong URL", "ftp://www.nutch.org", outlinks[0].getToUrl());
assertEquals("Wrong URL", "ftp://www.google.de", outlinks[1].getToUrl());
}
Modified: nutch/branches/2.x/src/test/org/apache/nutch/parse/TestParserFactory.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/parse/TestParserFactory.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/parse/TestParserFactory.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/parse/TestParserFactory.java Fri Jan 9 06:34:33 2015
@@ -28,24 +28,24 @@ import org.apache.nutch.util.NutchConfig
/**
* Unit test for new parse plugin selection.
- *
+ *
* @author Sebastien Le Callonnec
*/
public class TestParserFactory {
-
+
private Configuration conf;
private ParserFactory parserFactory;
-
+
/** Inits the Test Case with the test parse-plugin file */
@Before
public void setUp() throws Exception {
- conf = NutchConfiguration.create();
- conf.set("plugin.includes", ".*");
- conf.set("parse.plugin.file",
- "org/apache/nutch/parse/parse-plugin-test.xml");
- parserFactory = new ParserFactory(conf);
+ conf = NutchConfiguration.create();
+ conf.set("plugin.includes", ".*");
+ conf.set("parse.plugin.file",
+ "org/apache/nutch/parse/parse-plugin-test.xml");
+ parserFactory = new ParserFactory(conf);
}
-
+
/** Unit test for <code>getExtensions(String)</code> method. */
@Test
public void testGetExtensions() throws Exception {
@@ -56,48 +56,49 @@ public class TestParserFactory {
ext = parserFactory.getExtensions("foo/bar").get(0);
assertEquals("parse-tika", ext.getDescriptor().getPluginId());
}
-
+
/** Unit test to check <code>getParsers</code> method */
@Test
public void testGetParsers() throws Exception {
- Parser [] parsers = parserFactory.getParsers("text/html", "http://foo.com");
+ Parser[] parsers = parserFactory.getParsers("text/html", "http://foo.com");
assertNotNull(parsers);
assertEquals(1, parsers.length);
- assertEquals("org.apache.nutch.parse.tika.TikaParser",
- parsers[0].getClass().getName());
+ assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+ .getClass().getName());
parsers = parserFactory.getParsers("text/html; charset=ISO-8859-1",
- "http://foo.com");
+ "http://foo.com");
assertNotNull(parsers);
assertEquals(1, parsers.length);
- assertEquals("org.apache.nutch.parse.tika.TikaParser",
- parsers[0].getClass().getName());
-
+ assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+ .getClass().getName());
+
parsers = parserFactory.getParsers("application/x-javascript",
- "http://foo.com");
+ "http://foo.com");
assertNotNull(parsers);
assertEquals(1, parsers.length);
- assertEquals("org.apache.nutch.parse.js.JSParseFilter",
- parsers[0].getClass().getName());
-
+ assertEquals("org.apache.nutch.parse.js.JSParseFilter", parsers[0]
+ .getClass().getName());
+
parsers = parserFactory.getParsers("text/plain", "http://foo.com");
assertNotNull(parsers);
assertEquals(1, parsers.length);
- assertEquals("org.apache.nutch.parse.tika.TikaParser",
- parsers[0].getClass().getName());
-
+ assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+ .getClass().getName());
+
Parser parser1 = parserFactory.getParsers("text/plain", "http://foo.com")[0];
Parser parser2 = parserFactory.getParsers("*", "http://foo.com")[0];
-
+
assertEquals("Different instances!", parser1.hashCode(), parser2.hashCode());
-
- //test and make sure that the rss parser is loaded even though its plugin.xml
- //doesn't claim to support text/rss, only application/rss+xml
- parsers = parserFactory.getParsers("text/rss","http://foo.com");
+
+ // test and make sure that the rss parser is loaded even though its
+ // plugin.xml
+ // doesn't claim to support text/rss, only application/rss+xml
+ parsers = parserFactory.getParsers("text/rss", "http://foo.com");
assertNotNull(parsers);
- assertEquals(1,parsers.length);
- assertEquals("org.apache.nutch.parse.tika.TikaParser",
- parsers[0].getClass().getName());
+ assertEquals(1, parsers.length);
+ assertEquals("org.apache.nutch.parse.tika.TikaParser", parsers[0]
+ .getClass().getName());
}
-
+
}
Modified: nutch/branches/2.x/src/test/org/apache/nutch/plugin/HelloWorldExtension.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/plugin/HelloWorldExtension.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/plugin/HelloWorldExtension.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/plugin/HelloWorldExtension.java Fri Jan 9 06:34:33 2015
@@ -24,8 +24,11 @@ package org.apache.nutch.plugin;
*/
public class HelloWorldExtension implements ITestExtension {
- /* (non-Javadoc)
- * @see org.apache.nutch.plugin.ITestExtension#testGetExtension(java.lang.String)
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * org.apache.nutch.plugin.ITestExtension#testGetExtension(java.lang.String)
*/
public String testGetExtension(String hello) {
return hello + " World";
Modified: nutch/branches/2.x/src/test/org/apache/nutch/plugin/ITestExtension.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/plugin/ITestExtension.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/plugin/ITestExtension.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/plugin/ITestExtension.java Fri Jan 9 06:34:33 2015
@@ -15,11 +15,12 @@
* limitations under the License.
*/
package org.apache.nutch.plugin;
+
/**
* A Simple Test Extension Interface.
*
* @author joa23
- *
+ *
*/
public interface ITestExtension {
public String testGetExtension(String hello);
Modified: nutch/branches/2.x/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/plugin/SimpleTestPlugin.java Fri Jan 9 06:34:33 2015
@@ -28,8 +28,8 @@ import org.apache.hadoop.conf.Configurat
public class SimpleTestPlugin extends Plugin {
/**
- * @param pDescriptor
- * @param conf
+ * @param pDescriptor
+ * @param conf
*/
public SimpleTestPlugin(PluginDescriptor pDescriptor, Configuration conf) {
@@ -55,4 +55,3 @@ public class SimpleTestPlugin extends Pl
}
}
-
Modified: nutch/branches/2.x/src/test/org/apache/nutch/plugin/TestPluginSystem.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/plugin/TestPluginSystem.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/plugin/TestPluginSystem.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/plugin/TestPluginSystem.java Fri Jan 9 06:34:33 2015
@@ -42,266 +42,260 @@ import org.apache.nutch.util.NutchJobCon
* @author joa23
*/
public class TestPluginSystem {
- private int fPluginCount;
+ private int fPluginCount;
- private LinkedList<File> fFolders = new LinkedList<File>();
- private Configuration conf ;
- private PluginRepository repository;
-
- @Before
- public void setUp() throws Exception {
- this.conf = NutchConfiguration.create();
- conf.set("plugin.includes", ".*");
-// String string = this.conf.get("plugin.includes", "");
-// conf.set("plugin.includes", string + "|Dummy*");
- fPluginCount = 5;
- createDummyPlugins(fPluginCount);
- this.repository = PluginRepository.get(conf);
- }
-
- @After
- public void tearDown() throws Exception {
- for (int i = 0; i < fFolders.size(); i++) {
- File folder = (File) fFolders.get(i);
- delete(folder);
- folder.delete();
- }
-
- }
-
- /**
- */
- @Test
- public void testPluginConfiguration() {
- String string = getPluginFolder();
- File file = new File(string);
- if (!file.exists()) {
- file.mkdir();
- }
- assertTrue(file.exists());
- }
-
- /**
- */
- @Test
- public void testLoadPlugins() {
- PluginDescriptor[] descriptors = repository
- .getPluginDescriptors();
- int k = descriptors.length;
- assertTrue(fPluginCount <= k);
- for (int i = 0; i < descriptors.length; i++) {
- PluginDescriptor descriptor = descriptors[i];
- if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
- continue;
- }
- assertEquals(1, descriptor.getExportedLibUrls().length);
- assertEquals(1, descriptor.getNotExportedLibUrls().length);
- }
- }
-
- @Test
- public void testRepositoryCache() {
- Configuration config = NutchConfiguration.create();
- PluginRepository repo = PluginRepository.get(config);
- JobConf job = new NutchJobConf(config);
- PluginRepository repo1 = PluginRepository.get(job);
- assertTrue(repo == repo1);
- // now construct a config without UUID
- config = new Configuration();
- config.addResource("nutch-default.xml");
- config.addResource("nutch-site.xml");
- repo = PluginRepository.get(config);
- job = new NutchJobConf(config);
- repo1 = PluginRepository.get(job);
- assertTrue(repo1 != repo);
- }
+ private LinkedList<File> fFolders = new LinkedList<File>();
+ private Configuration conf;
+ private PluginRepository repository;
+
+ @Before
+ public void setUp() throws Exception {
+ this.conf = NutchConfiguration.create();
+ conf.set("plugin.includes", ".*");
+ // String string = this.conf.get("plugin.includes", "");
+ // conf.set("plugin.includes", string + "|Dummy*");
+ fPluginCount = 5;
+ createDummyPlugins(fPluginCount);
+ this.repository = PluginRepository.get(conf);
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ for (int i = 0; i < fFolders.size(); i++) {
+ File folder = (File) fFolders.get(i);
+ delete(folder);
+ folder.delete();
+ }
+
+ }
+
+ /**
+ */
+ @Test
+ public void testPluginConfiguration() {
+ String string = getPluginFolder();
+ File file = new File(string);
+ if (!file.exists()) {
+ file.mkdir();
+ }
+ assertTrue(file.exists());
+ }
+
+ /**
+ */
+ @Test
+ public void testLoadPlugins() {
+ PluginDescriptor[] descriptors = repository.getPluginDescriptors();
+ int k = descriptors.length;
+ assertTrue(fPluginCount <= k);
+ for (int i = 0; i < descriptors.length; i++) {
+ PluginDescriptor descriptor = descriptors[i];
+ if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
+ continue;
+ }
+ assertEquals(1, descriptor.getExportedLibUrls().length);
+ assertEquals(1, descriptor.getNotExportedLibUrls().length);
+ }
+ }
+
+ @Test
+ public void testRepositoryCache() {
+ Configuration config = NutchConfiguration.create();
+ PluginRepository repo = PluginRepository.get(config);
+ JobConf job = new NutchJobConf(config);
+ PluginRepository repo1 = PluginRepository.get(job);
+ assertTrue(repo == repo1);
+ // now construct a config without UUID
+ config = new Configuration();
+ config.addResource("nutch-default.xml");
+ config.addResource("nutch-site.xml");
+ repo = PluginRepository.get(config);
+ job = new NutchJobConf(config);
+ repo1 = PluginRepository.get(job);
+ assertTrue(repo1 != repo);
+ }
- /**
+ /**
*
*/
- @Test
- public void testGetExtensionAndAttributes() {
- String xpId = " sdsdsd";
- ExtensionPoint extensionPoint =repository
- .getExtensionPoint(xpId);
- assertEquals(extensionPoint, null);
- Extension[] extension1 = repository
- .getExtensionPoint(getGetExtensionId()).getExtensions();
- assertEquals(extension1.length, fPluginCount);
- for (int i = 0; i < extension1.length; i++) {
- Extension extension2 = extension1[i];
- String string = extension2.getAttribute(getGetConfigElementName());
- assertEquals(string, getParameterValue());
- }
+ @Test
+ public void testGetExtensionAndAttributes() {
+ String xpId = " sdsdsd";
+ ExtensionPoint extensionPoint = repository.getExtensionPoint(xpId);
+ assertEquals(extensionPoint, null);
+ Extension[] extension1 = repository.getExtensionPoint(getGetExtensionId())
+ .getExtensions();
+ assertEquals(extension1.length, fPluginCount);
+ for (int i = 0; i < extension1.length; i++) {
+ Extension extension2 = extension1[i];
+ String string = extension2.getAttribute(getGetConfigElementName());
+ assertEquals(string, getParameterValue());
+ }
+ }
+
+ /**
+ * @throws PluginRuntimeException
+ */
+ @Test
+ public void testGetExtensionInstances() throws PluginRuntimeException {
+ Extension[] extensions = repository.getExtensionPoint(getGetExtensionId())
+ .getExtensions();
+ assertEquals(extensions.length, fPluginCount);
+ for (int i = 0; i < extensions.length; i++) {
+ Extension extension = extensions[i];
+ Object object = extension.getExtensionInstance();
+ if (!(object instanceof HelloWorldExtension))
+ fail(" object is not a instance of HelloWorldExtension");
+ ((ITestExtension) object).testGetExtension("Bla ");
+ String string = ((ITestExtension) object).testGetExtension("Hello");
+ assertEquals("Hello World", string);
}
+ }
- /**
- * @throws PluginRuntimeException
- */
- @Test
- public void testGetExtensionInstances() throws PluginRuntimeException {
- Extension[] extensions = repository
- .getExtensionPoint(getGetExtensionId()).getExtensions();
- assertEquals(extensions.length, fPluginCount);
- for (int i = 0; i < extensions.length; i++) {
- Extension extension = extensions[i];
- Object object = extension.getExtensionInstance();
- if (!(object instanceof HelloWorldExtension))
- fail(" object is not a instance of HelloWorldExtension");
- ((ITestExtension) object).testGetExtension("Bla ");
- String string = ((ITestExtension) object).testGetExtension("Hello");
- assertEquals("Hello World", string);
- }
- }
-
- /**
+ /**
*
*
*/
- @Test
- public void testGetClassLoader() {
- PluginDescriptor[] descriptors = repository
- .getPluginDescriptors();
- for (int i = 0; i < descriptors.length; i++) {
- PluginDescriptor descriptor = descriptors[i];
- assertNotNull(descriptor.getClassLoader());
- }
- }
-
- /**
- * @throws IOException
- */
- @Test
- public void testGetResources() throws IOException {
- PluginDescriptor[] descriptors = repository
- .getPluginDescriptors();
- for (int i = 0; i < descriptors.length; i++) {
- PluginDescriptor descriptor = descriptors[i];
- if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
- continue;
- }
- String value = descriptor.getResourceString("key", Locale.UK);
- assertEquals("value", value);
- value = descriptor.getResourceString("key",
- Locale.TRADITIONAL_CHINESE);
- assertEquals("value", value);
-
- }
- }
-
- /**
- * @return a PluginFolderPath
- */
- private String getPluginFolder() {
- String[] strings = conf.getStrings("plugin.folders");
- if (strings == null || strings.length == 0)
- fail("no plugin directory setuped..");
-
- String name = strings[0];
- return new PluginManifestParser(conf, this.repository).getPluginFolder(name).toString();
- }
-
- /**
- * Creates some Dummy Plugins
- *
- * @param pCount
- */
- private void createDummyPlugins(int pCount) {
- String string = getPluginFolder();
- try {
- File folder = new File(string);
- folder.mkdir();
- for (int i = 0; i < pCount; i++) {
- String pluginFolder = string + File.separator + "DummyPlugin"
- + i;
- File file = new File(pluginFolder);
- file.mkdir();
- fFolders.add(file);
- createPluginManifest(i, file.getAbsolutePath());
- createResourceFile(file.getAbsolutePath());
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- /**
- * Creates an ResourceFile
- *
- * @param pFolderPath
- * @throws FileNotFoundException
- * @throws IOException
- */
- private void createResourceFile(String pFolderPath)
- throws FileNotFoundException, IOException {
- Properties properties = new Properties();
- properties.setProperty("key", "value");
- properties.store(new FileOutputStream(pFolderPath + File.separator
- + "messages" + ".properties"), "");
- }
-
- /**
- * Deletes files in path
- *
- * @param path
- * @throws IOException
- */
- private void delete(File path) throws IOException {
- File[] files = path.listFiles();
- for (int i = 0; i < files.length; ++i) {
- if (files[i].isDirectory())
- delete(files[i]);
- files[i].delete();
- }
- }
-
- /**
- * Creates an Plugin Manifest File
- *
- * @param i
- * @param pFolderPath
- * @throws IOException
- */
- private void createPluginManifest(int i, String pFolderPath)
- throws IOException {
- FileWriter out = new FileWriter(pFolderPath + File.separator
- + "plugin.xml");
- String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
- + "<!--this is just a simple plugin for testing issues.-->"
- + "<plugin id=\"org.apache.nutch.plugin."
- + i
- + "\" name=\""
- + i
- + "\" version=\"1.0\" provider-name=\"joa23\" "
- + "class=\"org.apache.nutch.plugin.SimpleTestPlugin\">"
- + "<extension-point id=\"aExtensioID\" "
- + "name=\"simple Parser Extension\" "
- + "schema=\"schema/testExtensionPoint.exsd\"/>"
- + "<runtime><library name=\"libs/exported.jar\"><extport/></library>"
- + "<library name=\"libs/not_exported.jar\"/></runtime>"
- + "<extension point=\"aExtensioID\">"
- + "<implementation name=\"simple Parser Extension\" "
- + "id=\"aExtensionId.\" class=\"org.apache.nutch.plugin.HelloWorldExtension\">"
- + "<parameter name=\"dummy-name\" value=\"a simple param value\"/>"
- + "</implementation></extension></plugin>";
- out.write(xml);
- out.flush();
- out.close();
- }
-
- private String getParameterValue() {
- return "a simple param value";
- }
-
- private static String getGetExtensionId() {
- return "aExtensioID";
- }
-
- private static String getGetConfigElementName() {
- return "dummy-name";
- }
-
- public static void main(String[] args) throws IOException {
- new TestPluginSystem().createPluginManifest(1, "/");
- }
+ @Test
+ public void testGetClassLoader() {
+ PluginDescriptor[] descriptors = repository.getPluginDescriptors();
+ for (int i = 0; i < descriptors.length; i++) {
+ PluginDescriptor descriptor = descriptors[i];
+ assertNotNull(descriptor.getClassLoader());
+ }
+ }
+
+ /**
+ * @throws IOException
+ */
+ @Test
+ public void testGetResources() throws IOException {
+ PluginDescriptor[] descriptors = repository.getPluginDescriptors();
+ for (int i = 0; i < descriptors.length; i++) {
+ PluginDescriptor descriptor = descriptors[i];
+ if (!descriptor.getPluginId().startsWith("getPluginFolder()")) {
+ continue;
+ }
+ String value = descriptor.getResourceString("key", Locale.UK);
+ assertEquals("value", value);
+ value = descriptor.getResourceString("key", Locale.TRADITIONAL_CHINESE);
+ assertEquals("value", value);
+
+ }
+ }
+
+ /**
+ * @return a PluginFolderPath
+ */
+ private String getPluginFolder() {
+ String[] strings = conf.getStrings("plugin.folders");
+ if (strings == null || strings.length == 0)
+ fail("no plugin directory setuped..");
+
+ String name = strings[0];
+ return new PluginManifestParser(conf, this.repository)
+ .getPluginFolder(name).toString();
+ }
+
+ /**
+ * Creates some Dummy Plugins
+ *
+ * @param pCount
+ */
+ private void createDummyPlugins(int pCount) {
+ String string = getPluginFolder();
+ try {
+ File folder = new File(string);
+ folder.mkdir();
+ for (int i = 0; i < pCount; i++) {
+ String pluginFolder = string + File.separator + "DummyPlugin" + i;
+ File file = new File(pluginFolder);
+ file.mkdir();
+ fFolders.add(file);
+ createPluginManifest(i, file.getAbsolutePath());
+ createResourceFile(file.getAbsolutePath());
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ /**
+ * Creates an ResourceFile
+ *
+ * @param pFolderPath
+ * @throws FileNotFoundException
+ * @throws IOException
+ */
+ private void createResourceFile(String pFolderPath)
+ throws FileNotFoundException, IOException {
+ Properties properties = new Properties();
+ properties.setProperty("key", "value");
+ properties.store(new FileOutputStream(pFolderPath + File.separator
+ + "messages" + ".properties"), "");
+ }
+
+ /**
+ * Deletes files in path
+ *
+ * @param path
+ * @throws IOException
+ */
+ private void delete(File path) throws IOException {
+ File[] files = path.listFiles();
+ for (int i = 0; i < files.length; ++i) {
+ if (files[i].isDirectory())
+ delete(files[i]);
+ files[i].delete();
+ }
+ }
+
+ /**
+ * Creates an Plugin Manifest File
+ *
+ * @param i
+ * @param pFolderPath
+ * @throws IOException
+ */
+ private void createPluginManifest(int i, String pFolderPath)
+ throws IOException {
+ FileWriter out = new FileWriter(pFolderPath + File.separator + "plugin.xml");
+ String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+ + "<!--this is just a simple plugin for testing issues.-->"
+ + "<plugin id=\"org.apache.nutch.plugin."
+ + i
+ + "\" name=\""
+ + i
+ + "\" version=\"1.0\" provider-name=\"joa23\" "
+ + "class=\"org.apache.nutch.plugin.SimpleTestPlugin\">"
+ + "<extension-point id=\"aExtensioID\" "
+ + "name=\"simple Parser Extension\" "
+ + "schema=\"schema/testExtensionPoint.exsd\"/>"
+ + "<runtime><library name=\"libs/exported.jar\"><extport/></library>"
+ + "<library name=\"libs/not_exported.jar\"/></runtime>"
+ + "<extension point=\"aExtensioID\">"
+ + "<implementation name=\"simple Parser Extension\" "
+ + "id=\"aExtensionId.\" class=\"org.apache.nutch.plugin.HelloWorldExtension\">"
+ + "<parameter name=\"dummy-name\" value=\"a simple param value\"/>"
+ + "</implementation></extension></plugin>";
+ out.write(xml);
+ out.flush();
+ out.close();
+ }
+
+ private String getParameterValue() {
+ return "a simple param value";
+ }
+
+ private static String getGetExtensionId() {
+ return "aExtensioID";
+ }
+
+ private static String getGetConfigElementName() {
+ return "dummy-name";
+ }
+
+ public static void main(String[] args) throws IOException {
+ new TestPluginSystem().createPluginManifest(1, "/");
+ }
}
Modified: nutch/branches/2.x/src/test/org/apache/nutch/protocol/TestContent.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/protocol/TestContent.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/protocol/TestContent.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/protocol/TestContent.java Fri Jan 9 06:34:33 2015
@@ -27,7 +27,6 @@ import org.apache.tika.mime.MimeTypes;
import org.junit.Test;
import static org.junit.Assert.*;
-
/** Unit tests for Content. */
public class TestContent {
@@ -46,7 +45,7 @@ public class TestContent {
metaData.add("Content-Type", "text/html");
Content r = new Content(url, url, page.getBytes("UTF8"), "text/html",
- metaData, conf);
+ metaData, conf);
WritableTestUtils.testWritable(r);
assertEquals("text/html", r.getMetadata().get("Content-Type"));
@@ -60,52 +59,36 @@ public class TestContent {
Content c = null;
Metadata p = new Metadata();
- c = new Content("http://www.foo.com/",
- "http://www.foo.com/",
- "".getBytes("UTF8"),
- "text/html; charset=UTF-8", p, conf);
+ c = new Content("http://www.foo.com/", "http://www.foo.com/",
+ "".getBytes("UTF8"), "text/html; charset=UTF-8", p, conf);
assertEquals("text/html", c.getContentType());
- c = new Content("http://www.foo.com/foo.html",
- "http://www.foo.com/",
- "".getBytes("UTF8"),
- "", p, conf);
+ c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
+ "".getBytes("UTF8"), "", p, conf);
assertEquals("text/html", c.getContentType());
- c = new Content("http://www.foo.com/foo.html",
- "http://www.foo.com/",
- "".getBytes("UTF8"),
- null, p, conf);
+ c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
+ "".getBytes("UTF8"), null, p, conf);
assertEquals("text/html", c.getContentType());
- c = new Content("http://www.foo.com/",
- "http://www.foo.com/",
- "<html></html>".getBytes("UTF8"),
- "", p, conf);
+ c = new Content("http://www.foo.com/", "http://www.foo.com/",
+ "<html></html>".getBytes("UTF8"), "", p, conf);
assertEquals("text/html", c.getContentType());
- c = new Content("http://www.foo.com/foo.html",
- "http://www.foo.com/",
- "<html></html>".getBytes("UTF8"),
- "text/plain", p, conf);
+ c = new Content("http://www.foo.com/foo.html", "http://www.foo.com/",
+ "<html></html>".getBytes("UTF8"), "text/plain", p, conf);
assertEquals("text/html", c.getContentType());
- c = new Content("http://www.foo.com/foo.png",
- "http://www.foo.com/",
- "<html></html>".getBytes("UTF8"),
- "text/plain", p, conf);
+ c = new Content("http://www.foo.com/foo.png", "http://www.foo.com/",
+ "<html></html>".getBytes("UTF8"), "text/plain", p, conf);
assertEquals("text/html", c.getContentType());
- c = new Content("http://www.foo.com/",
- "http://www.foo.com/",
- "".getBytes("UTF8"),
- "", p, conf);
+ c = new Content("http://www.foo.com/", "http://www.foo.com/",
+ "".getBytes("UTF8"), "", p, conf);
assertEquals(MimeTypes.OCTET_STREAM, c.getContentType());
- c = new Content("http://www.foo.com/",
- "http://www.foo.com/",
- "".getBytes("UTF8"),
- null, p, conf);
+ c = new Content("http://www.foo.com/", "http://www.foo.com/",
+ "".getBytes("UTF8"), null, p, conf);
assertNotNull(c.getContentType());
}
Modified: nutch/branches/2.x/src/test/org/apache/nutch/protocol/TestProtocolFactory.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/protocol/TestProtocolFactory.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/protocol/TestProtocolFactory.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/protocol/TestProtocolFactory.java Fri Jan 9 06:34:33 2015
@@ -28,58 +28,59 @@ public class TestProtocolFactory {
Configuration conf;
ProtocolFactory factory;
-
+
@Before
public void setUp() throws Exception {
conf = NutchConfiguration.create();
conf.set("plugin.includes", ".*");
conf.set("http.agent.name", "test-bot");
- factory=new ProtocolFactory(conf);
+ factory = new ProtocolFactory(conf);
}
@Test
- public void testGetProtocol(){
+ public void testGetProtocol() {
- //non existing protocol
+ // non existing protocol
try {
factory.getProtocol("xyzxyz://somehost");
fail("Must throw ProtocolNotFound");
} catch (ProtocolNotFound e) {
- //all is ok
- } catch (Exception ex){
+ // all is ok
+ } catch (Exception ex) {
fail("Must not throw any other exception");
}
-
- Protocol httpProtocol=null;
-
- //existing protocol
+
+ Protocol httpProtocol = null;
+
+ // existing protocol
try {
- httpProtocol=factory.getProtocol("http://somehost");
+ httpProtocol = factory.getProtocol("http://somehost");
assertNotNull(httpProtocol);
- } catch (Exception ex){
+ } catch (Exception ex) {
fail("Must not throw any other exception");
}
- //cache key
- Object protocol = ObjectCache.get(conf).getObject(Protocol.X_POINT_ID + "http");
+ // cache key
+ Object protocol = ObjectCache.get(conf).getObject(
+ Protocol.X_POINT_ID + "http");
assertNotNull(protocol);
assertEquals(httpProtocol, protocol);
-
- //test same object instance
+
+ // test same object instance
try {
- assertTrue(httpProtocol==factory.getProtocol("http://somehost"));
+ assertTrue(httpProtocol == factory.getProtocol("http://somehost"));
} catch (ProtocolNotFound e) {
fail("Must not throw any exception");
}
}
-
+
@Test
- public void testContains(){
+ public void testContains() {
assertTrue(factory.contains("http", "http"));
assertTrue(factory.contains("http", "http,ftp"));
assertTrue(factory.contains("http", " http , ftp"));
assertTrue(factory.contains("smb", "ftp,smb,http"));
assertFalse(factory.contains("smb", "smbb"));
}
-
+
}
Modified: nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java Fri Jan 9 06:34:33 2015
@@ -58,7 +58,7 @@ public class TestGoraStorage extends Abs
public void tearDown() throws Exception {
super.tearDown();
}
-
+
/**
* Sequentially read and write pages to a store.
*
@@ -71,7 +71,7 @@ public class TestGoraStorage extends Abs
readWrite(id, webPageStore);
}
- private static void readWrite(String id, DataStore<String, WebPage> store)
+ private static void readWrite(String id, DataStore<String, WebPage> store)
throws IOException, Exception {
WebPage page = WebPage.newBuilder().build();
int max = 1000;
@@ -147,9 +147,9 @@ public class TestGoraStorage extends Abs
assertEquals(0, (int) result.get());
}
}
-
+
/**
- * Tests multiple processes reading and writing to the same store backend,
+ * Tests multiple processes reading and writing to the same store backend,
* this is to simulate a multi process Nutch environment (i.e. MapReduce).
*
* @throws Exception
@@ -159,23 +159,23 @@ public class TestGoraStorage extends Abs
public void testMultiProcess() throws Exception {
// create and start a hsql server, a stand-alone (memory backed) db
// (important: a stand-alone server should be used because simple
- // file based access i.e. jdbc:hsqldb:file is NOT process-safe.)
+ // file based access i.e. jdbc:hsqldb:file is NOT process-safe.)
Server server = new Server();
server.setDaemon(true);
server.setSilent(true); // disables LOTS of trace
final String className = getClass().getName();
String dbName = "test";
- server.setDatabasePath(0, "mem:"+dbName);
+ server.setDatabasePath(0, "mem:" + dbName);
server.setDatabaseName(0, dbName);
server.start();
-
- //create the store so that the tests can start right away
+
+ // create the store so that the tests can start right away
StorageUtils.createWebStore(conf, String.class, WebPage.class);
-
+
// create a fixed thread pool
int numThreads = 4;
ExecutorService pool = Executors.newFixedThreadPool(numThreads);
-
+
// spawn multiple processes, each thread spawns own process
Collection<Callable<Integer>> tasks = new ArrayList<Callable<Integer>>();
for (int i = 0; i < numThreads; i++) {
@@ -190,15 +190,16 @@ public class TestGoraStorage extends Abs
classpath = "./src/testprocess" + pathSeparator + classpath;
String path = System.getProperty("java.home") + separator + "bin"
+ separator + "java";
- ProcessBuilder processBuilder = new ProcessBuilder(path, "-cp",
+ ProcessBuilder processBuilder = new ProcessBuilder(path, "-cp",
classpath, className);
processBuilder.redirectErrorStream(true);
Process process = processBuilder.start();
InputStream in = process.getInputStream();
int exit = process.waitFor();
- //print the output of the process
- System.out.println("===Process stream for " + Thread.currentThread()
- + "\n" + IOUtils.toString(in) + "===End of process stream.");
+ // print the output of the process
+ System.out.println("===Process stream for "
+ + Thread.currentThread() + "\n" + IOUtils.toString(in)
+ + "===End of process stream.");
in.close();
// process should exit with zero code
return exit;
@@ -218,8 +219,8 @@ public class TestGoraStorage extends Abs
for (Future<Integer> result : results) {
assertEquals(0, (int) result.get());
}
-
- //stop db
+
+ // stop db
server.stop();
}
@@ -228,7 +229,8 @@ public class TestGoraStorage extends Abs
System.out.println("Starting!");
Configuration localConf = CrawlTestUtil.createConfiguration();
- localConf.set("storage.data.store.class", "org.apache.gora.memory.store.MemStore");
+ localConf.set("storage.data.store.class",
+ "org.apache.gora.memory.store.MemStore");
DataStore<String, WebPage> store = StorageUtils.createWebStore(localConf,
String.class, WebPage.class);
Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/AbstractNutchTest.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/AbstractNutchTest.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/util/AbstractNutchTest.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/util/AbstractNutchTest.java Fri Jan 9 06:34:33 2015
@@ -37,7 +37,8 @@ public class AbstractNutchTest {
public void setUp() throws Exception {
conf = CrawlTestUtil.createConfiguration();
- conf.set("storage.data.store.class", "org.apache.gora.memory.store.MemStore");
+ conf.set("storage.data.store.class",
+ "org.apache.gora.memory.store.MemStore");
fs = FileSystem.get(conf);
webPageStore = StorageUtils.createWebStore(conf, String.class,
WebPage.class);
Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java Fri Jan 9 06:34:33 2015
@@ -42,7 +42,8 @@ import java.util.List;
public class CrawlTestUtil {
- private static final Logger LOG = LoggerFactory.getLogger(CrawlTestUtil.class);
+ private static final Logger LOG = LoggerFactory
+ .getLogger(CrawlTestUtil.class);
/**
* For now we need to manually construct our Configuration, because we need to
@@ -93,15 +94,16 @@ public class CrawlTestUtil {
out.flush();
out.close();
}
-
+
/**
* Read entries from a data store
- *
+ *
* @return list of matching {@link URLWebPage} objects
* @throws Exception
*/
- public static ArrayList<URLWebPage> readContents(DataStore<String,WebPage> store,
- Mark requiredMark, String... fields) throws Exception {
+ public static ArrayList<URLWebPage> readContents(
+ DataStore<String, WebPage> store, Mark requiredMark, String... fields)
+ throws Exception {
ArrayList<URLWebPage> l = new ArrayList<URLWebPage>();
Query<String, WebPage> query = store.newQuery();
@@ -121,7 +123,8 @@ public class CrawlTestUtil {
if (requiredMark != null && requiredMark.checkMark(page) == null)
continue;
- l.add(new URLWebPage(TableUtil.unreverseUrl(url), WebPage.newBuilder(page).build()));
+ l.add(new URLWebPage(TableUtil.unreverseUrl(url), WebPage.newBuilder(
+ page).build()));
} catch (Exception e) {
e.printStackTrace();
}
@@ -129,7 +132,6 @@ public class CrawlTestUtil {
return l;
}
-
/**
* Creates a new JettyServer with one static root context
*
@@ -145,7 +147,7 @@ public class CrawlTestUtil {
ResourceHandler handler = new ResourceHandler();
handler.setResourceBase(staticContent);
HandlerList handlers = new HandlerList();
- handlers.setHandlers(new Handler[]{handler, new DefaultHandler()});
+ handlers.setHandlers(new Handler[] { handler, new DefaultHandler() });
webServer.setHandler(handlers);
return webServer;
}
Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/TestEncodingDetector.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/TestEncodingDetector.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/util/TestEncodingDetector.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/util/TestEncodingDetector.java Fri Jan 9 06:34:33 2015
@@ -45,7 +45,7 @@ public class TestEncodingDetector {
// first disable auto detection
conf.setInt(EncodingDetector.MIN_CONFIDENCE_KEY, -1);
- //Metadata metadata = new Metadata();
+ // Metadata metadata = new Metadata();
EncodingDetector detector;
// Content content;
String encoding;
@@ -65,8 +65,9 @@ public class TestEncodingDetector {
page.setBaseUrl(new Utf8("http://www.example.com/"));
page.setContentType(new Utf8("text/plain"));
page.setContent(ByteBuffer.wrap(contentInOctets));
- page.getHeaders().put(EncodingDetector.CONTENT_TYPE_UTF8, new Utf8("text/plain; charset=UTF-16"));
-
+ page.getHeaders().put(EncodingDetector.CONTENT_TYPE_UTF8,
+ new Utf8("text/plain; charset=UTF-16"));
+
detector = new EncodingDetector(conf);
detector.autoDetectClues(page, true);
encoding = detector.guessEncoding(page, "windows-1252");
@@ -76,7 +77,7 @@ public class TestEncodingDetector {
page.setBaseUrl(new Utf8("http://www.example.com/"));
page.setContentType(new Utf8("text/plain"));
page.setContent(ByteBuffer.wrap(contentInOctets));
-
+
detector = new EncodingDetector(conf);
detector.autoDetectClues(page, true);
detector.addClue("windows-1254", "sniffed");
@@ -89,8 +90,9 @@ public class TestEncodingDetector {
page.setBaseUrl(new Utf8("http://www.example.com/"));
page.setContentType(new Utf8("text/plain"));
page.setContent(ByteBuffer.wrap(contentInOctets));
- page.getMetadata().put(new Utf8(Response.CONTENT_TYPE), ByteBuffer.wrap("text/plain; charset=UTF-16".getBytes()));
-
+ page.getMetadata().put(new Utf8(Response.CONTENT_TYPE),
+ ByteBuffer.wrap("text/plain; charset=UTF-16".getBytes()));
+
detector = new EncodingDetector(conf);
detector.autoDetectClues(page, true);
detector.addClue("utf-32", "sniffed");
Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/TestGZIPUtils.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/TestGZIPUtils.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/util/TestGZIPUtils.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/util/TestGZIPUtils.java Fri Jan 9 06:34:33 2015
@@ -26,222 +26,215 @@ import java.io.IOException;
public class TestGZIPUtils {
/* a short, highly compressable, string */
- String SHORT_TEST_STRING=
- "aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbcccccccccccccccc";
+ String SHORT_TEST_STRING = "aaaaaaaaaaaaaaaabbbbbbbbbbbbbbbbbbbbbcccccccccccccccc";
/* a short, highly compressable, string */
- String LONGER_TEST_STRING=
- SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
- + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
- + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
- + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING;
+ String LONGER_TEST_STRING = SHORT_TEST_STRING + SHORT_TEST_STRING
+ + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+ + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+ + SHORT_TEST_STRING + SHORT_TEST_STRING + SHORT_TEST_STRING
+ + SHORT_TEST_STRING;
/* a snapshot of the nutch webpage */
- String WEBPAGE=
- "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
- + "<html>\n"
- + "<head>\n"
- + " <meta http-equiv=\"content-type\"\n"
- + " content=\"text/html; charset=ISO-8859-1\">\n"
- + " <title>Nutch</title>\n"
- + "</head>\n"
- + "<body>\n"
- + "<h1\n"
- + " style=\"font-family: helvetica,arial,sans-serif; text-align: center; color: rgb(255, 153, 0);\"><a\n"
- + " href=\"http://www.nutch.org/\"><font style=\"color: rgb(255, 153, 0);\">Nutch</font></a><br>\n"
- + "<small>an open source web-search engine</small></h1>\n"
- + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
- + "<table\n"
- + " style=\"width: 100%; text-align: left; margin-left: auto; margin-right: auto;\"\n"
- + " border=\"0\" cellspacing=\"0\" cellpadding=\"0\">\n"
- + " <tbody>\n"
- + " <tr>\n"
- + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
- + " href=\"http://sourceforge.net/project/showfiles.php?group_id=59548\">Download</a><br>\n"
- + " </td>\n"
- + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
- + " href=\"tutorial.html\">Tutorial</a><br>\n"
- + " </td>\n"
- + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
- + " href=\"http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/nutch/nutch/\">CVS</a><br>\n"
- + " </td>\n"
- + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
- + " href=\"api/index.html\">Javadoc</a><br>\n"
- + " </td>\n"
- + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
- + " href=\"http://sourceforge.net/tracker/?atid=491356&group_id=59548&func=browse\">Bugs</a><br>\n"
- + " </td>\n"
- + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
- + " href=\"http://sourceforge.net/mail/?group_id=59548\">Lists</a></td>\n"
- + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
- + " href=\"policies.html\">Policies</a><br>\n"
- + " </td>\n"
- + " </tr>\n"
- + " </tbody>\n"
- + "</table>\n"
- + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
- + "<h2>Introduction</h2>\n"
- + "Nutch is a nascent effort to implement an open-source web search\n"
- + "engine. Web search is a basic requirement for internet navigation, yet\n"
- + "the number of web search engines is decreasing. Today's oligopoly could\n"
- + "soon be a monopoly, with a single company controlling nearly all web\n"
- + "search for its commercial gain. That would not be good for the\n"
- + "users of internet. Nutch aims to enable anyone to easily and\n"
- + "cost-effectively deploy a world-class web search engine.<br>\n"
- + "<br>\n"
- + "To succeed, the Nutch software must be able to:<br>\n"
- + "<ul>\n"
- + " <li> crawl several billion pages per month</li>\n"
- + " <li>maintain an index of these pages</li>\n"
- + " <li>search that index up to 1000 times per second</li>\n"
- + " <li>provide very high quality search results</li>\n"
- + " <li>operate at minimal cost</li>\n"
- + "</ul>\n"
- + "<h2>Status</h2>\n"
- + "Currently we're just a handful of developers working part-time to put\n"
- + "together a demo. The demo is coded entirely in Java. However\n"
- + "persistent data is written in well-documented formats so that modules\n"
- + "may eventually be re-written in other languages (e.g., Perl, C++) as the\n"
- + "project progresses.<br>\n"
- + "<br>\n"
- + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\"> <a\n"
- + " href=\"http://sourceforge.net\"> </a>\n"
- + "<div style=\"text-align: center;\"><a href=\"http://sourceforge.net\"><img\n"
- + " src=\"http://sourceforge.net/sflogo.php?group_id=59548&type=1\"\n"
- + " style=\"border: 0px solid ; width: 88px; height: 31px;\"\n"
- + " alt=\"SourceForge.net Logo\" title=\"\"></a></div>\n"
- + "</body>\n"
- + "</html>\n";
+ String WEBPAGE = "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">\n"
+ + "<html>\n"
+ + "<head>\n"
+ + " <meta http-equiv=\"content-type\"\n"
+ + " content=\"text/html; charset=ISO-8859-1\">\n"
+ + " <title>Nutch</title>\n"
+ + "</head>\n"
+ + "<body>\n"
+ + "<h1\n"
+ + " style=\"font-family: helvetica,arial,sans-serif; text-align: center; color: rgb(255, 153, 0);\"><a\n"
+ + " href=\"http://www.nutch.org/\"><font style=\"color: rgb(255, 153, 0);\">Nutch</font></a><br>\n"
+ + "<small>an open source web-search engine</small></h1>\n"
+ + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
+ + "<table\n"
+ + " style=\"width: 100%; text-align: left; margin-left: auto; margin-right: auto;\"\n"
+ + " border=\"0\" cellspacing=\"0\" cellpadding=\"0\">\n"
+ + " <tbody>\n"
+ + " <tr>\n"
+ + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
+ + " href=\"http://sourceforge.net/project/showfiles.php?group_id=59548\">Download</a><br>\n"
+ + " </td>\n"
+ + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
+ + " href=\"tutorial.html\">Tutorial</a><br>\n"
+ + " </td>\n"
+ + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
+ + " href=\"http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/nutch/nutch/\">CVS</a><br>\n"
+ + " </td>\n"
+ + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
+ + " href=\"api/index.html\">Javadoc</a><br>\n"
+ + " </td>\n"
+ + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
+ + " href=\"http://sourceforge.net/tracker/?atid=491356&group_id=59548&func=browse\">Bugs</a><br>\n"
+ + " </td>\n"
+ + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
+ + " href=\"http://sourceforge.net/mail/?group_id=59548\">Lists</a></td>\n"
+ + " <td style=\"vertical-align: top; text-align: center;\"><a\n"
+ + " href=\"policies.html\">Policies</a><br>\n"
+ + " </td>\n"
+ + " </tr>\n"
+ + " </tbody>\n"
+ + "</table>\n"
+ + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\">\n"
+ + "<h2>Introduction</h2>\n"
+ + "Nutch is a nascent effort to implement an open-source web search\n"
+ + "engine. Web search is a basic requirement for internet navigation, yet\n"
+ + "the number of web search engines is decreasing. Today's oligopoly could\n"
+ + "soon be a monopoly, with a single company controlling nearly all web\n"
+ + "search for its commercial gain. That would not be good for the\n"
+ + "users of internet. Nutch aims to enable anyone to easily and\n"
+ + "cost-effectively deploy a world-class web search engine.<br>\n"
+ + "<br>\n"
+ + "To succeed, the Nutch software must be able to:<br>\n"
+ + "<ul>\n"
+ + " <li> crawl several billion pages per month</li>\n"
+ + " <li>maintain an index of these pages</li>\n"
+ + " <li>search that index up to 1000 times per second</li>\n"
+ + " <li>provide very high quality search results</li>\n"
+ + " <li>operate at minimal cost</li>\n"
+ + "</ul>\n"
+ + "<h2>Status</h2>\n"
+ + "Currently we're just a handful of developers working part-time to put\n"
+ + "together a demo. The demo is coded entirely in Java. However\n"
+ + "persistent data is written in well-documented formats so that modules\n"
+ + "may eventually be re-written in other languages (e.g., Perl, C++) as the\n"
+ + "project progresses.<br>\n"
+ + "<br>\n"
+ + "<hr style=\"width: 100%; height: 1px;\" noshade=\"noshade\"> <a\n"
+ + " href=\"http://sourceforge.net\"> </a>\n"
+ + "<div style=\"text-align: center;\"><a href=\"http://sourceforge.net\"><img\n"
+ + " src=\"http://sourceforge.net/sflogo.php?group_id=59548&type=1\"\n"
+ + " style=\"border: 0px solid ; width: 88px; height: 31px;\"\n"
+ + " alt=\"SourceForge.net Logo\" title=\"\"></a></div>\n"
+ + "</body>\n"
+ + "</html>\n";
// tests
@Test
public void testZipUnzip() {
- byte[] testBytes= SHORT_TEST_STRING.getBytes();
+ byte[] testBytes = SHORT_TEST_STRING.getBytes();
testZipUnzip(testBytes);
- testBytes= LONGER_TEST_STRING.getBytes();
+ testBytes = LONGER_TEST_STRING.getBytes();
testZipUnzip(testBytes);
- testBytes= WEBPAGE.getBytes();
+ testBytes = WEBPAGE.getBytes();
testZipUnzip(testBytes);
}
@Test
public void testZipUnzipBestEffort() {
- byte[] testBytes= SHORT_TEST_STRING.getBytes();
+ byte[] testBytes = SHORT_TEST_STRING.getBytes();
testZipUnzipBestEffort(testBytes);
- testBytes= LONGER_TEST_STRING.getBytes();
+ testBytes = LONGER_TEST_STRING.getBytes();
testZipUnzipBestEffort(testBytes);
- testBytes= WEBPAGE.getBytes();
+ testBytes = WEBPAGE.getBytes();
testZipUnzipBestEffort(testBytes);
}
-
+
@Test
public void testTruncation() {
- byte[] testBytes= SHORT_TEST_STRING.getBytes();
+ byte[] testBytes = SHORT_TEST_STRING.getBytes();
testTruncation(testBytes);
- testBytes= LONGER_TEST_STRING.getBytes();
+ testBytes = LONGER_TEST_STRING.getBytes();
testTruncation(testBytes);
- testBytes= WEBPAGE.getBytes();
+ testBytes = WEBPAGE.getBytes();
testTruncation(testBytes);
}
@Test
public void testLimit() {
- byte[] testBytes= SHORT_TEST_STRING.getBytes();
+ byte[] testBytes = SHORT_TEST_STRING.getBytes();
testLimit(testBytes);
- testBytes= LONGER_TEST_STRING.getBytes();
+ testBytes = LONGER_TEST_STRING.getBytes();
testLimit(testBytes);
- testBytes= WEBPAGE.getBytes();
+ testBytes = WEBPAGE.getBytes();
testLimit(testBytes);
}
// helpers
public void testZipUnzip(byte[] origBytes) {
- byte[] compressedBytes= GZIPUtils.zip(origBytes);
+ byte[] compressedBytes = GZIPUtils.zip(origBytes);
assertTrue("compressed array is not smaller!",
- compressedBytes.length < origBytes.length);
+ compressedBytes.length < origBytes.length);
- byte[] uncompressedBytes= null;
+ byte[] uncompressedBytes = null;
try {
- uncompressedBytes= GZIPUtils.unzip(compressedBytes);
+ uncompressedBytes = GZIPUtils.unzip(compressedBytes);
} catch (IOException e) {
e.printStackTrace();
- assertTrue("caught exception '" + e + "' during unzip()",
- false);
+ assertTrue("caught exception '" + e + "' during unzip()", false);
}
- assertTrue("uncompressedBytes is wrong size",
- uncompressedBytes.length == origBytes.length);
+ assertTrue("uncompressedBytes is wrong size",
+ uncompressedBytes.length == origBytes.length);
- for (int i= 0; i < origBytes.length; i++)
+ for (int i = 0; i < origBytes.length; i++)
if (origBytes[i] != uncompressedBytes[i])
- assertTrue("uncompressedBytes does not match origBytes", false);
+ assertTrue("uncompressedBytes does not match origBytes", false);
}
public void testZipUnzipBestEffort(byte[] origBytes) {
- byte[] compressedBytes= GZIPUtils.zip(origBytes);
+ byte[] compressedBytes = GZIPUtils.zip(origBytes);
assertTrue("compressed array is not smaller!",
- compressedBytes.length < origBytes.length);
+ compressedBytes.length < origBytes.length);
- byte[] uncompressedBytes= GZIPUtils.unzipBestEffort(compressedBytes);
- assertTrue("uncompressedBytes is wrong size",
- uncompressedBytes.length == origBytes.length);
+ byte[] uncompressedBytes = GZIPUtils.unzipBestEffort(compressedBytes);
+ assertTrue("uncompressedBytes is wrong size",
+ uncompressedBytes.length == origBytes.length);
- for (int i= 0; i < origBytes.length; i++)
+ for (int i = 0; i < origBytes.length; i++)
if (origBytes[i] != uncompressedBytes[i])
- assertTrue("uncompressedBytes does not match origBytes", false);
+ assertTrue("uncompressedBytes does not match origBytes", false);
}
public void testTruncation(byte[] origBytes) {
- byte[] compressedBytes= GZIPUtils.zip(origBytes);
+ byte[] compressedBytes = GZIPUtils.zip(origBytes);
System.out.println("original data has len " + origBytes.length);
- System.out.println("compressed data has len "
- + compressedBytes.length);
+ System.out.println("compressed data has len " + compressedBytes.length);
- for (int i= compressedBytes.length; i >= 0; i--) {
+ for (int i = compressedBytes.length; i >= 0; i--) {
- byte[] truncCompressed= new byte[i];
+ byte[] truncCompressed = new byte[i];
- for (int j= 0; j < i; j++)
- truncCompressed[j]= compressedBytes[j];
+ for (int j = 0; j < i; j++)
+ truncCompressed[j] = compressedBytes[j];
- byte[] trunc= GZIPUtils.unzipBestEffort(truncCompressed);
+ byte[] trunc = GZIPUtils.unzipBestEffort(truncCompressed);
if (trunc == null) {
- System.out.println("truncated to len "
- + i + ", trunc is null");
+ System.out.println("truncated to len " + i + ", trunc is null");
} else {
- System.out.println("truncated to len "
- + i + ", trunc.length= "
- + trunc.length);
-
- for (int j= 0; j < trunc.length; j++)
- if (trunc[j] != origBytes[j])
- assertTrue("truncated/uncompressed array differs at pos "
- + j + " (compressed data had been truncated to len "
- + i + ")", false);
+ System.out.println("truncated to len " + i + ", trunc.length= "
+ + trunc.length);
+
+ for (int j = 0; j < trunc.length; j++)
+ if (trunc[j] != origBytes[j])
+ assertTrue("truncated/uncompressed array differs at pos " + j
+ + " (compressed data had been truncated to len " + i + ")",
+ false);
}
}
}
public void testLimit(byte[] origBytes) {
- byte[] compressedBytes= GZIPUtils.zip(origBytes);
+ byte[] compressedBytes = GZIPUtils.zip(origBytes);
assertTrue("compressed array is not smaller!",
- compressedBytes.length < origBytes.length);
+ compressedBytes.length < origBytes.length);
- for (int i= 0; i < origBytes.length; i++) {
+ for (int i = 0; i < origBytes.length; i++) {
- byte[] uncompressedBytes=
- GZIPUtils.unzipBestEffort(compressedBytes, i);
+ byte[] uncompressedBytes = GZIPUtils.unzipBestEffort(compressedBytes, i);
- assertTrue("uncompressedBytes is wrong size",
- uncompressedBytes.length == i);
+ assertTrue("uncompressedBytes is wrong size",
+ uncompressedBytes.length == i);
- for (int j= 0; j < i; j++)
+ for (int j = 0; j < i; j++)
if (origBytes[j] != uncompressedBytes[j])
assertTrue("uncompressedBytes does not match origBytes", false);
}
Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/TestMimeUtil.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/TestMimeUtil.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/util/TestMimeUtil.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/util/TestMimeUtil.java Fri Jan 9 06:34:33 2015
@@ -36,7 +36,8 @@ public class TestMimeUtil extends TestCa
private File sampleDir = new File(System.getProperty("test.build.data", "."),
"test-mime-util");
- /** test data, every element on "test page":
+ /**
+ * test data, every element on "test page":
* <ol>
* <li>MIME type</li>
* <li>file name (last URL path element)</li>
@@ -67,15 +68,11 @@ public class TestMimeUtil extends TestCa
"<?xml version=\"1.0\"?>\n<html xmlns=\"http://www.w3.org/1999/xhtml\">"
+ "<html>\n<head>\n"
+ "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\" />"
- + "</head>\n<body>Hello, World!</body></html>" }
- };
+ + "</head>\n<body>Hello, World!</body></html>" } };
- public static String[][] binaryFiles = {
- {
+ public static String[][] binaryFiles = { {
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
- "test.xlsx",
- "" }
- };
+ "test.xlsx", "" } };
private String getMimeType(String url, File file, String contentType,
boolean useMagic) throws IOException {
@@ -121,8 +118,8 @@ public class TestMimeUtil extends TestCa
public void testBinaryFiles() throws IOException {
for (String[] testPage : binaryFiles) {
File dataFile = new File(sampleDir, testPage[1]);
- String mimeType = getMimeType(urlPrefix + testPage[1],
- dataFile, testPage[2], false);
+ String mimeType = getMimeType(urlPrefix + testPage[1], dataFile,
+ testPage[2], false);
assertEquals("", testPage[0], mimeType);
}
}
Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/TestNodeWalker.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/TestNodeWalker.java?rev=1650447&r1=1650446&r2=1650447&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/util/TestNodeWalker.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/util/TestNodeWalker.java Fri Jan 9 06:34:33 2015
@@ -31,41 +31,40 @@ import org.xml.sax.InputSource;
public class TestNodeWalker {
/* a snapshot of the nutch webpage */
- private final static String WEBPAGE=
- "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\"><head><title>Nutch</title></head>"
- + "<body>"
- + "<ul>"
- + "<li>crawl several billion pages per month</li>"
- + "<li>maintain an index of these pages</li>"
- + "<li>search that index up to 1000 times per second</li>"
- + "<li>provide very high quality search results</li>"
- + "<li>operate at minimal cost</li>"
- + "</ul>"
- + "</body>"
- + "</html>";
+ private final static String WEBPAGE = "<html xmlns=\"http://www.w3.org/1999/xhtml\" lang=\"en\" xml:lang=\"en\"><head><title>Nutch</title></head>"
+ + "<body>"
+ + "<ul>"
+ + "<li>crawl several billion pages per month</li>"
+ + "<li>maintain an index of these pages</li>"
+ + "<li>search that index up to 1000 times per second</li>"
+ + "<li>provide very high quality search results</li>"
+ + "<li>operate at minimal cost</li>" + "</ul>" + "</body>" + "</html>";
private final static String[] ULCONTENT = new String[4];
-
+
@Before
- public void setUp() throws Exception{
- ULCONTENT[0]="crawl several billion pages per month" ;
- ULCONTENT[1]="maintain an index of these pages" ;
- ULCONTENT[2]="search that index up to 1000 times per second" ;
- ULCONTENT[3]="operate at minimal cost" ;
+ public void setUp() throws Exception {
+ ULCONTENT[0] = "crawl several billion pages per month";
+ ULCONTENT[1] = "maintain an index of these pages";
+ ULCONTENT[2] = "search that index up to 1000 times per second";
+ ULCONTENT[3] = "operate at minimal cost";
}
@Test
public void testSkipChildren() {
- DOMParser parser= new DOMParser();
-
+ DOMParser parser = new DOMParser();
+
try {
parser.setFeature("http://xml.org/sax/features/validation", false);
- parser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
- parser.parse(new InputSource(new ByteArrayInputStream(WEBPAGE.getBytes())));
+ parser.setFeature(
+ "http://apache.org/xml/features/nonvalidating/load-external-dtd",
+ false);
+ parser
+ .parse(new InputSource(new ByteArrayInputStream(WEBPAGE.getBytes())));
} catch (Exception e) {
e.printStackTrace();
}
-
+
StringBuffer sb = new StringBuffer();
NodeWalker walker = new NodeWalker(parser.getDocument());
while (walker.hasNext()) {
@@ -77,30 +76,33 @@ public class TestNodeWalker {
sb.append(text);
}
}
- assertTrue("UL Content can NOT be found in the node", findSomeUlContent(sb.toString()));
-
- StringBuffer sbSkip = new StringBuffer();
- NodeWalker walkerSkip = new NodeWalker(parser.getDocument());
- while (walkerSkip.hasNext()) {
- Node currentNode = walkerSkip.nextNode();
- String nodeName = currentNode.getNodeName();
- short nodeType = currentNode.getNodeType();
- if ("ul".equalsIgnoreCase(nodeName)) {
- walkerSkip.skipChildren();
- }
- if (nodeType == Node.TEXT_NODE) {
- String text = currentNode.getNodeValue();
- text = text.replaceAll("\\s+", " ");
- sbSkip.append(text);
- }
- }
- assertFalse("UL Content can be found in the node", findSomeUlContent(sbSkip.toString()));
+ assertTrue("UL Content can NOT be found in the node",
+ findSomeUlContent(sb.toString()));
+
+ StringBuffer sbSkip = new StringBuffer();
+ NodeWalker walkerSkip = new NodeWalker(parser.getDocument());
+ while (walkerSkip.hasNext()) {
+ Node currentNode = walkerSkip.nextNode();
+ String nodeName = currentNode.getNodeName();
+ short nodeType = currentNode.getNodeType();
+ if ("ul".equalsIgnoreCase(nodeName)) {
+ walkerSkip.skipChildren();
+ }
+ if (nodeType == Node.TEXT_NODE) {
+ String text = currentNode.getNodeValue();
+ text = text.replaceAll("\\s+", " ");
+ sbSkip.append(text);
+ }
+ }
+ assertFalse("UL Content can be found in the node",
+ findSomeUlContent(sbSkip.toString()));
}
-
+
public boolean findSomeUlContent(String str) {
- for(int i=0; i<ULCONTENT.length ; i++){
- if(str.contains(ULCONTENT[i])) return true;
- }
+ for (int i = 0; i < ULCONTENT.length; i++) {
+ if (str.contains(ULCONTENT[i]))
+ return true;
+ }
return false;
}
}