You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2014/03/29 01:54:41 UTC

svn commit: r1582928 [1/4] - in /nutch/trunk: ./ ivy/ src/plugin/creativecommons/src/test/org/creativecommons/nutch/ src/plugin/feed/src/test/org/apache/nutch/parse/feed/ src/plugin/index-anchor/src/test/org/apache/nutch/indexer/anchor/ src/plugin/inde...

Author: lewismc
Date: Sat Mar 29 00:54:40 2014
New Revision: 1582928

URL: http://svn.apache.org/r1582928
Log:
NUTCH-1737 Upgrade to recent JUnit 4.x

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/build.xml
    nutch/trunk/ivy/ivy.xml
    nutch/trunk/pom.xml
    nutch/trunk/src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java
    nutch/trunk/src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java
    nutch/trunk/src/plugin/index-anchor/src/test/org/apache/nutch/indexer/anchor/TestAnchorIndexingFilter.java
    nutch/trunk/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java
    nutch/trunk/src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java
    nutch/trunk/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java
    nutch/trunk/src/plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java
    nutch/trunk/src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java
    nutch/trunk/src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java
    nutch/trunk/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java
    nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java
    nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java
    nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java
    nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/html/TestMetatagParser.java
    nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java
    nutch/trunk/src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestDOMContentUtils.java
    nutch/trunk/src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestFeedParser.java
    nutch/trunk/src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestImageMetadata.java
    nutch/trunk/src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestMSWordParser.java
    nutch/trunk/src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestOOParser.java
    nutch/trunk/src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestPdfParser.java
    nutch/trunk/src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestRTFParser.java
    nutch/trunk/src/plugin/parse-tika/src/test/org/apache/nutch/tika/TestRobotsMetaProcessor.java
    nutch/trunk/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
    nutch/trunk/src/plugin/protocol-file/src/test/org/apache/nutch/protocol/file/TestProtocolFile.java
    nutch/trunk/src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java
    nutch/trunk/src/plugin/subcollection/src/test/org/apache/nutch/collection/TestSubcollection.java
    nutch/trunk/src/plugin/urlfilter-automaton/src/test/org/apache/nutch/urlfilter/automaton/TestAutomatonURLFilter.java
    nutch/trunk/src/plugin/urlfilter-domain/src/test/org/apache/nutch/urlfilter/domain/TestDomainURLFilter.java
    nutch/trunk/src/plugin/urlfilter-domainblacklist/src/test/org/apache/nutch/urlfilter/domainblacklist/TestDomainBlacklistURLFilter.java
    nutch/trunk/src/plugin/urlfilter-regex/src/test/org/apache/nutch/urlfilter/regex/TestRegexURLFilter.java
    nutch/trunk/src/plugin/urlfilter-suffix/src/test/org/apache/nutch/urlfilter/suffix/TestSuffixURLFilter.java
    nutch/trunk/src/plugin/urlfilter-validator/src/test/org/apache/nutch/urlfilter/validator/TestUrlValidator.java
    nutch/trunk/src/plugin/urlnormalizer-basic/src/test/org/apache/nutch/net/urlnormalizer/basic/TestBasicURLNormalizer.java
    nutch/trunk/src/plugin/urlnormalizer-host/src/test/org/apache/nutch/net/urlnormalizer/host/TestHostURLNormalizer.java
    nutch/trunk/src/plugin/urlnormalizer-pass/src/test/org/apache/nutch/net/urlnormalizer/pass/TestPassURLNormalizer.java
    nutch/trunk/src/plugin/urlnormalizer-regex/src/test/org/apache/nutch/net/urlnormalizer/regex/TestRegexURLNormalizer.java
    nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbFilter.java
    nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java
    nutch/trunk/src/test/org/apache/nutch/crawl/TestGenerator.java
    nutch/trunk/src/test/org/apache/nutch/crawl/TestInjector.java
    nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java
    nutch/trunk/src/test/org/apache/nutch/crawl/TestSignatureFactory.java
    nutch/trunk/src/test/org/apache/nutch/fetcher/TestFetcher.java
    nutch/trunk/src/test/org/apache/nutch/indexer/TestIndexingFilters.java
    nutch/trunk/src/test/org/apache/nutch/metadata/TestMetadata.java
    nutch/trunk/src/test/org/apache/nutch/metadata/TestSpellCheckedMetadata.java
    nutch/trunk/src/test/org/apache/nutch/net/TestURLFilters.java
    nutch/trunk/src/test/org/apache/nutch/net/TestURLNormalizers.java
    nutch/trunk/src/test/org/apache/nutch/parse/TestOutlinkExtractor.java
    nutch/trunk/src/test/org/apache/nutch/parse/TestParseData.java
    nutch/trunk/src/test/org/apache/nutch/parse/TestParseText.java
    nutch/trunk/src/test/org/apache/nutch/parse/TestParserFactory.java
    nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java
    nutch/trunk/src/test/org/apache/nutch/protocol/TestContent.java
    nutch/trunk/src/test/org/apache/nutch/protocol/TestProtocolFactory.java
    nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java
    nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMergerCrawlDatums.java
    nutch/trunk/src/test/org/apache/nutch/util/TestEncodingDetector.java
    nutch/trunk/src/test/org/apache/nutch/util/TestGZIPUtils.java
    nutch/trunk/src/test/org/apache/nutch/util/TestNodeWalker.java
    nutch/trunk/src/test/org/apache/nutch/util/TestPrefixStringMatcher.java
    nutch/trunk/src/test/org/apache/nutch/util/TestStringUtil.java
    nutch/trunk/src/test/org/apache/nutch/util/TestSuffixStringMatcher.java
    nutch/trunk/src/test/org/apache/nutch/util/TestURLUtil.java
    nutch/trunk/src/test/org/apache/nutch/util/WritableTestUtils.java

Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Sat Mar 29 00:54:40 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Nutch Current Development
 
+* NUTCH-1737 Upgrade to recent JUnit 4.x (lewismc)
+
 * NUTCH-1733 parse-html to support HTML5 charset definitions (snagel)
 
 * NUTCH-1671 indexchecker to add digest field (snagel, lufeng)

Modified: nutch/trunk/build.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Sat Mar 29 00:54:40 2014
@@ -924,6 +924,8 @@
         <source path="${basedir}/src/plugin/index-basic/src/java/" />
         <source path="${basedir}/src/plugin/index-basic/src/test/" />
         <source path="${basedir}/src/plugin/indexer-solr/src/java/" />
+        <source path="${basedir}/src/plugin/indexer-elastic/src/java/" />
+        <source path="${basedir}/src/plugin/indexer-dummy/src/java/" />
         <source path="${basedir}/src/plugin/index-metadata/src/java/" />
         <source path="${basedir}/src/plugin/index-more/src/java/" />
         <source path="${basedir}/src/plugin/index-more/src/test/" />
@@ -984,6 +986,8 @@
         <source path="${basedir}/src/plugin/urlnormalizer-pass/src/test/" />
         <source path="${basedir}/src/plugin/urlnormalizer-regex/src/java/" />
         <source path="${basedir}/src/plugin/urlnormalizer-regex/src/test/" />
+        <source path="${basedir}/src/plugin/urlnormalizer-querystring/src/java/" />
+        <source path="${basedir}/src/plugin/urlnormalizer-querystring/src/test/" />
 
         <output path="${basedir}/build/classes" />
       </classpath>

Modified: nutch/trunk/ivy/ivy.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/ivy/ivy.xml?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/ivy/ivy.xml (original)
+++ nutch/trunk/ivy/ivy.xml Sat Mar 29 00:54:40 2014
@@ -80,7 +80,7 @@
 		<!--Configuration: test -->
 
 		<!--artifacts needed for testing -->
-		<dependency org="junit" name="junit" rev="3.8.1" conf="*->default" />
+		<dependency org="junit" name="junit" rev="4.11" conf="*->default" />
 		<dependency org="org.apache.hadoop" name="hadoop-test" rev="1.2.0"
 			conf="test->default" />
 

Modified: nutch/trunk/pom.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/pom.xml?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/pom.xml (original)
+++ nutch/trunk/pom.xml Sat Mar 29 00:54:40 2014
@@ -221,7 +221,7 @@
                 <dependency>
                         <groupId>junit</groupId>
                         <artifactId>junit</artifactId>
-                        <version>3.8.1</version>
+                        <version>4.11</version>
                         <optional>true</optional>
                 </dependency>
                 <dependency>

Modified: nutch/trunk/src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java (original)
+++ nutch/trunk/src/plugin/creativecommons/src/test/org/creativecommons/nutch/TestCCParseFilter.java Sat Mar 29 00:54:40 2014
@@ -23,18 +23,17 @@ import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.protocol.Content;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.NutchConfiguration;
+import org.junit.Assert;
+import org.junit.Test;
 
-import java.util.Properties;
 import java.io.*;
-import java.net.URL;
 
-import junit.framework.TestCase;
-
-public class TestCCParseFilter extends TestCase {
+public class TestCCParseFilter {
 
   private static final File testDir =
     new File(System.getProperty("test.input"));
 
+  @Test
   public void testPages() throws Exception {
     pageTest(new File(testDir, "anchor.html"), "http://foo.com/",
              "http://creativecommons.org/licenses/by-nc-sa/1.0", "a", null);
@@ -69,9 +68,9 @@ public class TestCCParseFilter extends T
     Parse parse =  new ParseUtil(conf).parse(content).get(content.getUrl());
     
     Metadata metadata = parse.getData().getParseMeta();
-    assertEquals(license, metadata.get("License-Url"));
-    assertEquals(location, metadata.get("License-Location"));
-    assertEquals(type, metadata.get("Work-Type"));
+    Assert.assertEquals(license, metadata.get("License-Url"));
+    Assert.assertEquals(location, metadata.get("License-Location"));
+    Assert.assertEquals(type, metadata.get("Work-Type"));
   }
 }
 

Modified: nutch/trunk/src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java (original)
+++ nutch/trunk/src/plugin/feed/src/test/org/apache/nutch/parse/feed/TestFeedParser.java Sat Mar 29 00:54:40 2014
@@ -21,6 +21,8 @@ package org.apache.nutch.parse.feed;
 import java.util.Iterator;
 import java.util.Map;
 
+import org.junit.Assert;
+import org.junit.Test;
 // APACHE imports
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -37,9 +39,6 @@ import org.apache.nutch.protocol.Protoco
 import org.apache.nutch.protocol.ProtocolNotFound;
 import org.apache.nutch.util.NutchConfiguration;
 
-// Junit imports
-import junit.framework.TestCase;
-
 /**
  * 
  * @author mattmann
@@ -47,7 +46,7 @@ import junit.framework.TestCase;
  * Test Suite for the {@link FeedParser}.
  * 
  */
-public class TestFeedParser extends TestCase {
+public class TestFeedParser {
 
   private String fileSeparator = System.getProperty("file.separator");
 
@@ -63,16 +62,6 @@ public class TestFeedParser extends Test
       .getName());
 
   /**
-   * Default Constructor.
-   * 
-   * @param name
-   *          The name of this {@link TestCase}.
-   */
-  public TestFeedParser(String name) {
-    super(name);
-  }
-
-  /**
    * Calls the {@link FeedParser} on a sample RSS file and checks that there are
    * 3 {@link ParseResult} entries including the below 2 links:
    * <ul>
@@ -87,6 +76,7 @@ public class TestFeedParser extends Test
    * @throws ParseException
    *           If the {@link Parser}Layer cannot be loaded.
    */
+  @Test
   public void testParseFetchChannel() throws ProtocolNotFound, ParseException {
     String urlString;
     Protocol protocol;
@@ -104,7 +94,7 @@ public class TestFeedParser extends Test
 
       parseResult = new ParseUtil(conf).parseByExtensionId("feed", content);
 
-      assertEquals(3, parseResult.size());
+      Assert.assertEquals(3, parseResult.size());
 
       boolean hasLink1 = false, hasLink2 = false, hasLink3=false;
 
@@ -121,12 +111,12 @@ public class TestFeedParser extends Test
           hasLink3 = true;
         }
 
-        assertNotNull(entry.getValue());
-        assertNotNull(entry.getValue().getData());
+        Assert.assertNotNull(entry.getValue());
+        Assert.assertNotNull(entry.getValue().getData());
       }
 
       if (!hasLink1 || !hasLink2 || !hasLink3) {
-        fail("Outlinks read from sample rss file are not correct!");
+        Assert.fail("Outlinks read from sample rss file are not correct!");
       }
     }
 

Modified: nutch/trunk/src/plugin/index-anchor/src/test/org/apache/nutch/indexer/anchor/TestAnchorIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-anchor/src/test/org/apache/nutch/indexer/anchor/TestAnchorIndexingFilter.java?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-anchor/src/test/org/apache/nutch/indexer/anchor/TestAnchorIndexingFilter.java (original)
+++ nutch/trunk/src/plugin/index-anchor/src/test/org/apache/nutch/indexer/anchor/TestAnchorIndexingFilter.java Sat Mar 29 00:54:40 2014
@@ -16,8 +16,6 @@
  */
 package org.apache.nutch.indexer.anchor;
 
-import junit.framework.TestCase;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
 import org.apache.nutch.crawl.CrawlDatum;
@@ -27,6 +25,8 @@ import org.apache.nutch.indexer.NutchDoc
 import org.apache.nutch.parse.ParseData;
 import org.apache.nutch.parse.ParseImpl;
 import org.apache.nutch.util.NutchConfiguration;
+import org.junit.Assert;
+import org.junit.Test;
 
 /**
  * JUnit test case which tests
@@ -36,14 +36,15 @@ import org.apache.nutch.util.NutchConfig
  * @author lewismc
  *
  */
-public class TestAnchorIndexingFilter extends TestCase {
+public class TestAnchorIndexingFilter {
 
+  @Test
   public void testDeduplicateAnchor() throws Exception {
     Configuration conf = NutchConfiguration.create();
     conf.setBoolean("anchorIndexingFilter.deduplicate", true);
     AnchorIndexingFilter filter = new AnchorIndexingFilter();
     filter.setConf(conf);
-    assertNotNull(filter);
+    Assert.assertNotNull(filter);
     NutchDocument doc = new NutchDocument();
     ParseImpl parse = new ParseImpl("foo bar", new ParseData());
     Inlinks inlinks = new Inlinks();
@@ -54,11 +55,11 @@ public class TestAnchorIndexingFilter ex
       filter.filter(doc, parse, new Text("http://nutch.apache.org/index.html"), new CrawlDatum(), inlinks);
     } catch(Exception e){
       e.printStackTrace();
-      fail(e.getMessage());
+      Assert.fail(e.getMessage());
     }
-    assertNotNull(doc);
-    assertTrue("test if there is an anchor at all", doc.getFieldNames().contains("anchor"));
-    assertEquals("test dedup, we expect 2", 2, doc.getField("anchor").getValues().size());
+    Assert.assertNotNull(doc);
+    Assert.assertTrue("test if there is an anchor at all", doc.getFieldNames().contains("anchor"));
+    Assert.assertEquals("test dedup, we expect 2", 2, doc.getField("anchor").getValues().size());
   }
 
 }

Modified: nutch/trunk/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java (original)
+++ nutch/trunk/src/plugin/index-basic/src/test/org/apache/nutch/indexer/basic/TestBasicIndexingFilter.java Sat Mar 29 00:54:40 2014
@@ -28,9 +28,10 @@ import org.apache.nutch.parse.ParseData;
 import org.apache.nutch.parse.ParseImpl;
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.util.NutchConfiguration;
+import org.junit.Assert;
+import org.junit.Test;
 
 import java.util.Date;
-import junit.framework.TestCase;
 
 /**
  * JUnit test case which tests 
@@ -43,8 +44,9 @@ import junit.framework.TestCase;
  *
  */
 
-public class TestBasicIndexingFilter extends TestCase {
+public class TestBasicIndexingFilter {
 
+  @Test
   public void testBasicIndexingFilter() throws Exception { 
     Configuration conf = NutchConfiguration.create();
     conf.setInt("indexer.max.title.length", 10);
@@ -53,7 +55,7 @@ public class TestBasicIndexingFilter ext
 
     BasicIndexingFilter filter = new BasicIndexingFilter();
     filter.setConf(conf);
-    assertNotNull(filter);
+    Assert.assertNotNull(filter);
 
     NutchDocument doc = new NutchDocument();
 
@@ -73,15 +75,15 @@ public class TestBasicIndexingFilter ext
       filter.filter(doc, parse, new Text("http://nutch.apache.org/index.html"), crawlDatum, inlinks);
     } catch(Exception e){
       e.printStackTrace();
-      fail(e.getMessage());
+      Assert.fail(e.getMessage());
     }
-    assertNotNull(doc);
-    assertEquals("test title, expect \"The Foo Pa\"", "The Foo Pa", doc.getField("title").getValues().get(0));
-    assertEquals("test domain, expect \"apache.org\"", "apache.org", doc.getField("domain").getValues().get(0));
-    assertEquals("test host, expect \"nutch.apache.org\"", "nutch.apache.org", doc.getField("host").getValues().get(0));
-    assertEquals("test url, expect \"http://nutch.apache.org/index.html\"", "http://nutch.apache.org/index.html", 
+    Assert.assertNotNull(doc);
+    Assert.assertEquals("test title, expect \"The Foo Pa\"", "The Foo Pa", doc.getField("title").getValues().get(0));
+    Assert.assertEquals("test domain, expect \"apache.org\"", "apache.org", doc.getField("domain").getValues().get(0));
+    Assert.assertEquals("test host, expect \"nutch.apache.org\"", "nutch.apache.org", doc.getField("host").getValues().get(0));
+    Assert.assertEquals("test url, expect \"http://nutch.apache.org/index.html\"", "http://nutch.apache.org/index.html", 
       doc.getField("url").getValues().get(0));
-    assertEquals("test content", "this is a sample foo", doc.getField("content").getValues().get(0));
-    assertEquals("test fetch time", new Date(100L), (Date)doc.getField("tstamp").getValues().get(0));
+    Assert.assertEquals("test content", "this is a sample foo", doc.getField("content").getValues().get(0));
+    Assert.assertEquals("test fetch time", new Date(100L), (Date)doc.getField("tstamp").getValues().get(0));
   }
 }

Modified: nutch/trunk/src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java (original)
+++ nutch/trunk/src/plugin/index-more/src/test/org/apache/nutch/indexer/more/TestMoreIndexingFilter.java Sat Mar 29 00:54:40 2014
@@ -29,17 +29,19 @@ import org.apache.nutch.parse.ParseData;
 import org.apache.nutch.parse.ParseImpl;
 import org.apache.nutch.parse.ParseStatus;
 import org.apache.nutch.util.NutchConfiguration;
+import org.junit.Assert;
+import org.junit.Test;
 
-import junit.framework.TestCase;
-
-public class TestMoreIndexingFilter extends TestCase {
+public class TestMoreIndexingFilter {
 
+  @Test
   public void testContentType() throws IndexingException {
     Configuration conf = NutchConfiguration.create();
     assertContentType(conf, "text/html", "text/html");
     assertContentType(conf, "text/html; charset=UTF-8", "text/html");
   }
   
+  @Test
   public void testGetParts() {
     String[] parts = MoreIndexingFilter.getParts("text/html");
     assertParts(parts, 2, "text", "html");
@@ -48,12 +50,13 @@ public class TestMoreIndexingFilter exte
   /**
    * @since NUTCH-901
    */
+  @Test
   public void testNoParts(){
     Configuration conf = NutchConfiguration.create();
     conf.setBoolean("moreIndexingFilter.indexMimeTypeParts", false);
     MoreIndexingFilter filter = new MoreIndexingFilter();
     filter.setConf(conf);
-    assertNotNull(filter);
+    Assert.assertNotNull(filter);
     NutchDocument doc = new NutchDocument();
     ParseImpl parse = new ParseImpl("foo bar", new ParseData());
     
@@ -62,14 +65,15 @@ public class TestMoreIndexingFilter exte
     }
     catch(Exception e){
         e.printStackTrace();
-        fail(e.getMessage());
+        Assert.fail(e.getMessage());
     }
-    assertNotNull(doc);
-    assertTrue(doc.getFieldNames().contains("type"));
-    assertEquals(1, doc.getField("type").getValues().size());
-    assertEquals("text/html", doc.getFieldValue("type"));    
+    Assert.assertNotNull(doc);
+    Assert.assertTrue(doc.getFieldNames().contains("type"));
+    Assert.assertEquals(1, doc.getField("type").getValues().size());
+    Assert.assertEquals("text/html", doc.getFieldValue("type"));    
   }
 
+  @Test
   public void testContentDispositionTitle() throws IndexingException {
     Configuration conf = NutchConfiguration.create();
 
@@ -82,13 +86,13 @@ public class TestMoreIndexingFilter exte
       new ParseStatus(), "title", new Outlink[0], metadata)), new Text(
         "http://www.example.com/"), new CrawlDatum(), new Inlinks());
 
-    assertEquals("content-disposition not detected", "filename.ext", doc.getFieldValue("title"));
+    Assert.assertEquals("content-disposition not detected", "filename.ext", doc.getFieldValue("title"));
   }
 
   private void assertParts(String[] parts, int count, String... expected) {
-    assertEquals(count, parts.length);
+    Assert.assertEquals(count, parts.length);
     for (int i = 0; i < expected.length; i++) {
-      assertEquals(expected[i], parts[i]);
+      Assert.assertEquals(expected[i], parts[i]);
     }
   }
   
@@ -100,6 +104,6 @@ public class TestMoreIndexingFilter exte
     NutchDocument doc = filter.filter(new NutchDocument(), new ParseImpl("text", new ParseData(
         new ParseStatus(), "title", new Outlink[0], metadata)), new Text(
         "http://www.example.com/"), new CrawlDatum(), new Inlinks());
-    assertEquals("mime type not detected", expected, doc.getFieldValue("type"));
+    Assert.assertEquals("mime type not detected", expected, doc.getFieldValue("type"));
   }
 }

Modified: nutch/trunk/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java (original)
+++ nutch/trunk/src/plugin/index-static/src/test/org/apache/nutch/indexer/staticfield/TestStaticFieldIndexerTest.java Sat Mar 29 00:54:40 2014
@@ -23,8 +23,9 @@ import org.apache.nutch.crawl.Inlinks;
 import org.apache.nutch.indexer.NutchDocument;
 import org.apache.nutch.parse.ParseImpl;
 import org.apache.nutch.util.NutchConfiguration;
-
-import junit.framework.TestCase;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
 
 /**
  * JUnit test case which tests 
@@ -36,7 +37,7 @@ import junit.framework.TestCase;
  * @author tejasp
  */
 
-public class TestStaticFieldIndexerTest extends TestCase {
+public class TestStaticFieldIndexerTest {
 
   Configuration conf;
 
@@ -46,7 +47,8 @@ public class TestStaticFieldIndexerTest 
   Text url;
   StaticFieldIndexer filter;
 
-  protected void setUp() throws Exception {
+  @Before
+  public void setUp() throws Exception {
     conf = NutchConfiguration.create();
     parse = new ParseImpl();
     url = new Text("http://nutch.apache.org/index.html");
@@ -59,9 +61,10 @@ public class TestStaticFieldIndexerTest 
    * Test that empty {@code index.static} does not add anything to the document
    * @throws Exception 
    */
+  @Test
   public void testEmptyIndexStatic() throws Exception {
 
-    assertNotNull(filter);
+    Assert.assertNotNull(filter);
     filter.setConf(conf);
 
     NutchDocument doc = new NutchDocument();
@@ -70,22 +73,23 @@ public class TestStaticFieldIndexerTest 
       filter.filter(doc, parse, url, crawlDatum, inlinks);
     } catch (Exception e) {
       e.printStackTrace();
-      fail(e.getMessage());
+      Assert.fail(e.getMessage());
     }
 
-    assertNotNull(doc);
-    assertTrue("tests if no field is set for empty index.static", doc.getFieldNames().isEmpty());
+    Assert.assertNotNull(doc);
+    Assert.assertTrue("tests if no field is set for empty index.static", doc.getFieldNames().isEmpty());
   }
 
   /**
    * Test that valid field:value pairs are added to the document
    * @throws Exception 
    */
+  @Test
   public void testNormalScenario() throws Exception {
 
     conf.set("index.static",
         "field1:val1, field2    :      val2 val3     , field3, field4 :val4 , ");
-    assertNotNull(filter);
+    Assert.assertNotNull(filter);
     filter.setConf(conf);
 
     NutchDocument doc = new NutchDocument();
@@ -94,17 +98,17 @@ public class TestStaticFieldIndexerTest 
       filter.filter(doc, parse, url, crawlDatum, inlinks);
     } catch (Exception e) {
       e.printStackTrace();
-      fail(e.getMessage());
+      Assert.fail(e.getMessage());
     }
 
-    assertNotNull(doc);
-    assertFalse("test if doc is not empty", doc.getFieldNames().isEmpty());
-    assertEquals("test if doc has 3 fields", 3, doc.getFieldNames().size());
-    assertTrue("test if doc has field1", doc.getField("field1").getValues()
+    Assert.assertNotNull(doc);
+    Assert.assertFalse("test if doc is not empty", doc.getFieldNames().isEmpty());
+    Assert.assertEquals("test if doc has 3 fields", 3, doc.getFieldNames().size());
+    Assert.assertTrue("test if doc has field1", doc.getField("field1").getValues()
         .contains("val1"));
-    assertTrue("test if doc has field2", doc.getField("field2").getValues()
+    Assert.assertTrue("test if doc has field2", doc.getField("field2").getValues()
         .contains("val2"));
-    assertTrue("test if doc has field4", doc.getField("field4").getValues()
+    Assert.assertTrue("test if doc has field4", doc.getField("field4").getValues()
         .contains("val4"));
   }
 }

Modified: nutch/trunk/src/plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/src/plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java (original)
+++ nutch/trunk/src/plugin/language-identifier/src/test/org/apache/nutch/analysis/lang/TestHTMLLanguageParser.java Sat Mar 29 00:54:40 2014
@@ -16,13 +16,10 @@
  */
 package org.apache.nutch.analysis.lang;
 
-
-
-// JUnit imports
 import java.io.BufferedReader;
 import java.io.InputStreamReader;
 
-import junit.framework.TestCase;
+
 
 // Nutch imports
 import org.apache.nutch.metadata.Metadata;
@@ -31,9 +28,11 @@ import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.tika.language.LanguageIdentifier;
+import org.junit.Assert;
+import org.junit.Test;
 
 
-public class TestHTMLLanguageParser extends TestCase {
+public class TestHTMLLanguageParser {
 
   private static String URL = "http://foo.bar/";
 
@@ -50,6 +49,7 @@ public class TestHTMLLanguageParser exte
   /**
    * Test parsing of language identifiers from html 
    **/
+  @Test
   public void testMetaHTMLParsing() {
 
     try {
@@ -58,16 +58,17 @@ public class TestHTMLLanguageParser exte
       for (int t = 0; t < docs.length; t++) {
         Content content = getContent(docs[t]);
         Parse parse = parser.parse(content).get(content.getUrl());
-        assertEquals(metalanguages[t], (String) parse.getData().getParseMeta().get(Metadata.LANGUAGE));
+        Assert.assertEquals(metalanguages[t], (String) parse.getData().getParseMeta().get(Metadata.LANGUAGE));
       }
     } catch (Exception e) {
       e.printStackTrace(System.out);
-      fail(e.toString());
+      Assert.fail(e.toString());
     }
 
   }
 
   /** Test of <code>LanguageParser.parseLanguage(String)</code> method. */
+  @Test
   public void testParseLanguage() {
     String tests[][] = {
       { "(SCHEME=ISO.639-1) sv", "sv" },
@@ -117,7 +118,7 @@ public class TestHTMLLanguageParser exte
     };
     
     for (int i=0; i<44; i++) {
-      assertEquals(tests[i][1], HTMLLanguageParser.LanguageParser.parseLanguage(tests[i][0]));
+      Assert.assertEquals(tests[i][1], HTMLLanguageParser.LanguageParser.parseLanguage(tests[i][0]));
     }
   }
   
@@ -129,6 +130,7 @@ public class TestHTMLLanguageParser exte
   }
   
 
+  @Test
   public void testLanguageIndentifier() {
     try {
       long total = 0;
@@ -150,7 +152,7 @@ public class TestHTMLLanguageParser exte
             if (testLine.length() > 256) {
               identifier = new LanguageIdentifier(testLine);
               lang = identifier.getLanguage();
-              assertEquals(tokens[1], lang);
+              Assert.assertEquals(tokens[1], lang);
             }
           }
           testFile.close();
@@ -162,14 +164,14 @@ public class TestHTMLLanguageParser exte
           lang = identifier.getLanguage();
           System.out.println(lang);
           total += System.currentTimeMillis() - start;
-          assertEquals(tokens[1], lang);
+          Assert.assertEquals(tokens[1], lang);
         }
       }
       in.close();
       System.out.println("Total Time=" + total);
     } catch (Exception e) {
       e.printStackTrace();
-      fail(e.toString());
+      Assert.fail(e.toString());
     }
   }
 

Modified: nutch/trunk/src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java (original)
+++ nutch/trunk/src/plugin/lib-http/src/test/org/apache/nutch/protocol/http/api/TestRobotRulesParser.java Sat Mar 29 00:54:40 2014
@@ -17,8 +17,10 @@
 
 package org.apache.nutch.protocol.http.api;
 
+import org.junit.Assert;
+import org.junit.Test;
+
 import crawlercommons.robots.BaseRobotRules;
-import junit.framework.TestCase;
 
 /**
  * JUnit test case which tests
@@ -26,14 +28,14 @@ import junit.framework.TestCase;
  * 2. that crawl delay is extracted correctly from the robots file
  *
  */
-public class TestRobotRulesParser extends TestCase {
+public class TestRobotRulesParser {
 
   private static final String CONTENT_TYPE = "text/plain";
   private static final String SINGLE_AGENT = "Agent1";
   private static final String MULTIPLE_AGENTS = "Agent2, Agent1";
   private static final String UNKNOWN_AGENT = "AgentABC";
   private static final String CR = "\r";
-  
+
   private static final String ROBOTS_STRING = 
       "User-Agent: Agent1 #foo" + CR 
       + "Disallow: /a" + CR 
@@ -50,7 +52,7 @@ public class TestRobotRulesParser extend
       + "" + CR 
       + "User-Agent: *" + CR 
       + "Disallow: /foo/bar/" + CR;   // no crawl delay for other agents
-  
+
   private static final String[] TEST_PATHS = new String[] {
     "http://example.com/a",
     "http://example.com/a/bloh/foo.html",
@@ -72,20 +74,22 @@ public class TestRobotRulesParser extend
   private HttpRobotRulesParser parser;
   private BaseRobotRules rules;
 
+  public TestRobotRulesParser () {
+  }
   public TestRobotRulesParser(String name) {
-    super(name);
     parser = new HttpRobotRulesParser();
   }
 
   /**
-  * Test that the robots rules are interpreted correctly by the robots rules parser. 
-  */
+   * Test that the robots rules are interpreted correctly by the robots rules parser. 
+   */
+  @Test
   public void testRobotsAgent() {
     rules = parser.parseRules("testRobotsAgent", ROBOTS_STRING.getBytes(), CONTENT_TYPE, SINGLE_AGENT);
 
     for(int counter = 0; counter < TEST_PATHS.length; counter++) {
-      assertTrue("testing on agent (" + SINGLE_AGENT + "), and " 
-              + "path " + TEST_PATHS[counter] 
+      Assert.assertTrue("testing on agent (" + SINGLE_AGENT + "), and " 
+          + "path " + TEST_PATHS[counter] 
               + " got " + rules.isAllowed(TEST_PATHS[counter]),
               rules.isAllowed(TEST_PATHS[counter]) == RESULTS[counter]);
     }
@@ -93,24 +97,25 @@ public class TestRobotRulesParser extend
     rules = parser.parseRules("testRobotsAgent", ROBOTS_STRING.getBytes(), CONTENT_TYPE, MULTIPLE_AGENTS);
 
     for(int counter = 0; counter < TEST_PATHS.length; counter++) {
-      assertTrue("testing on agents (" + MULTIPLE_AGENTS + "), and " 
-              + "path " + TEST_PATHS[counter] 
+      Assert.assertTrue("testing on agents (" + MULTIPLE_AGENTS + "), and " 
+          + "path " + TEST_PATHS[counter] 
               + " got " + rules.isAllowed(TEST_PATHS[counter]),
               rules.isAllowed(TEST_PATHS[counter]) == RESULTS[counter]);
     }
   }
 
   /**
-  * Test that the crawl delay is extracted from the robots file for respective agent. 
-  * If its not specified for a given agent, default value must be returned.
-  */
+   * Test that the crawl delay is extracted from the robots file for respective agent. 
+   * If its not specified for a given agent, default value must be returned.
+   */
+  @Test
   public void testCrawlDelay() {
     // for SINGLE_AGENT, the crawl delay of 10 sec ie. 10000 msec must be returned by the parser
     rules = parser.parseRules("testCrawlDelay", ROBOTS_STRING.getBytes(), CONTENT_TYPE, SINGLE_AGENT);
-    assertTrue("testing crawl delay for agent "+ SINGLE_AGENT +" : ", (rules.getCrawlDelay() == 10000));
-    
+    Assert.assertTrue("testing crawl delay for agent "+ SINGLE_AGENT +" : ", (rules.getCrawlDelay() == 10000));
+
     // for UNKNOWN_AGENT, the default crawl delay must be returned.
     rules = parser.parseRules("testCrawlDelay", ROBOTS_STRING.getBytes(), CONTENT_TYPE, UNKNOWN_AGENT);
-    assertTrue("testing crawl delay for agent "+ UNKNOWN_AGENT +" : ", (rules.getCrawlDelay() == Long.MIN_VALUE));
+    Assert.assertTrue("testing crawl delay for agent "+ UNKNOWN_AGENT +" : ", (rules.getCrawlDelay() == Long.MIN_VALUE));
   }
 }

Modified: nutch/trunk/src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java (original)
+++ nutch/trunk/src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java Sat Mar 29 00:54:40 2014
@@ -24,10 +24,7 @@ import java.io.Reader;
 import java.util.ArrayList;
 import java.util.List;
 
-// JUnit imports
-import junit.framework.TestCase;
-
-// Commons Logging imports
+import org.junit.Assert;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -40,30 +37,26 @@ import org.apache.nutch.net.URLFilter;
  *
  * @author J&eacute;r&ocirc;me Charron
  */
-public abstract class RegexURLFilterBaseTest extends TestCase {
-  
+public abstract class RegexURLFilterBaseTest {
+
   /** My logger */
   protected static final Logger LOG = LoggerFactory.getLogger(RegexURLFilterBaseTest.class);  
 
   private final static String SEPARATOR = System.getProperty("file.separator");  
   private final static String SAMPLES = System.getProperty("test.data", ".");
-  
-  public RegexURLFilterBaseTest(String testName) {
-    super(testName);
-  }
-  
+
   protected abstract URLFilter getURLFilter(Reader rules);
 
   protected void bench(int loops, String file) {
     try {
       bench(loops,
-            new FileReader(SAMPLES + SEPARATOR + file + ".rules"),
-            new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
+          new FileReader(SAMPLES + SEPARATOR + file + ".rules"),
+          new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
     } catch (Exception e) {
-      fail(e.toString());
+      Assert.fail(e.toString());
     }
   }
-  
+
   protected void bench(int loops, Reader rules, Reader urls) {
     long start = System.currentTimeMillis();
     try {
@@ -73,40 +66,40 @@ public abstract class RegexURLFilterBase
         test(filter, expected);
       }
     } catch (Exception e) {
-      fail(e.toString());
+      Assert.fail(e.toString());
     }
     LOG.info("bench time (" + loops + ") " +
-             (System.currentTimeMillis()-start) + "ms");
+        (System.currentTimeMillis()-start) + "ms");
   }
-  
+
   protected void test(String file) {
     try {
       test(new FileReader(SAMPLES + SEPARATOR + file + ".rules"),
-           new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
+          new FileReader(SAMPLES + SEPARATOR + file + ".urls"));
     } catch (Exception e) {
-      fail(e.toString());
+      Assert.fail(e.toString());
     }
   }
-  
+
   protected void test(Reader rules, Reader urls) {
     try {
       test(getURLFilter(rules), readURLFile(urls));
     } catch (Exception e) {
-      fail(e.toString());
+      Assert.fail(e.toString());
     }
   }
-  
+
   protected void test(URLFilter filter, FilteredURL[] expected) {
     for (int i=0; i<expected.length; i++) {
       String result = filter.filter(expected[i].url);
       if (result != null) {
-        assertTrue(expected[i].url, expected[i].sign);
+        Assert.assertTrue(expected[i].url, expected[i].sign);
       } else {
-        assertFalse(expected[i].url, expected[i].sign);
+        Assert.assertFalse(expected[i].url, expected[i].sign);
       }
     }
   }
-  
+
   private static FilteredURL[] readURLFile(Reader reader) throws IOException {
     BufferedReader in = new BufferedReader(reader);
     List<FilteredURL> list = new ArrayList<FilteredURL>();
@@ -118,9 +111,9 @@ public abstract class RegexURLFilterBase
     }
     return (FilteredURL[]) list.toArray(new FilteredURL[list.size()]);
   }
-    
+
   private static class FilteredURL {
-  
+
     boolean sign;
     String url;
 
@@ -138,5 +131,5 @@ public abstract class RegexURLFilterBase
       url = line.substring(1);
     }
   }
-  
+
 }

Modified: nutch/trunk/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java (original)
+++ nutch/trunk/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java Sat Mar 29 00:54:40 2014
@@ -21,18 +21,17 @@ import org.apache.nutch.protocol.Protoco
 import org.apache.nutch.protocol.Protocol;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.protocol.ProtocolException;
-
 import org.apache.nutch.parse.Parse;
-import org.apache.nutch.parse.ParseImpl;
 import org.apache.nutch.parse.ParseUtil;
 import org.apache.nutch.parse.ParseException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.NutchConfiguration;
-
 import org.apache.hadoop.io.Text;
 import org.apache.nutch.crawl.CrawlDatum;
-
-import junit.framework.TestCase;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
 
 import java.io.File;
 import java.io.FileOutputStream;
@@ -49,7 +48,7 @@ import java.io.IOException;
  *
  * @author John Xing
  */
-public class TestExtParser extends TestCase {
+public class TestExtParser {
   private File tempFile = null;
   private String urlString = null;
   private Content content = null;
@@ -59,10 +58,7 @@ public class TestExtParser extends TestC
   // echo -n "nutch rocks nutch rocks nutch rocks" | md5sum
   private String expectedMD5sum = "df46711a1a48caafc98b1c3b83aa1526";
 
-  public TestExtParser(String name) { 
-    super(name); 
-  }
-
+  @Before
   protected void setUp() throws ProtocolException, IOException {
     // prepare a temp file with expectedText as its content
     // This system property is defined in ./src/plugin/build-plugin.xml
@@ -76,7 +72,7 @@ public class TestExtParser extends TestC
       // otherwise in java.io.tmpdir
       tempFile = File.createTempFile("nutch.test.plugin.ExtParser.",".txt");
     }
-    urlString = tempFile.toURL().toString();
+    urlString = tempFile.toURI().toURL().toString();
 
     FileOutputStream fos = new FileOutputStream(tempFile);
     fos.write(expectedText.getBytes());
@@ -88,6 +84,7 @@ public class TestExtParser extends TestC
     protocol = null;
   }
 
+  @After
   protected void tearDown() {
     // clean content
     content = null;
@@ -97,6 +94,7 @@ public class TestExtParser extends TestC
     //  tempFile.delete();
   }
 
+  @Test
   public void testIt() throws ParseException {
     String contentType;
 
@@ -114,13 +112,13 @@ public class TestExtParser extends TestC
       contentType = "application/vnd.nutch.example.cat";
       content.setContentType(contentType);
       parse = new ParseUtil(conf).parseByExtensionId("parse-ext", content).get(content.getUrl());
-      assertEquals(expectedText,parse.getText());
+      Assert.assertEquals(expectedText,parse.getText());
 
       // check external parser that does 'md5sum'
       contentType = "application/vnd.nutch.example.md5sum";
       content.setContentType(contentType);
       parse = new ParseUtil(conf).parseByExtensionId("parse-ext", content).get(content.getUrl());
-      assertTrue(parse.getText().startsWith(expectedMD5sum));
+      Assert.assertTrue(parse.getText().startsWith(expectedMD5sum));
     }
   }
 

Modified: nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java (original)
+++ nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestDOMContentUtils.java Sat Mar 29 00:54:40 2014
@@ -17,8 +17,6 @@
 
 package org.apache.nutch.parse.html;
 
-import junit.framework.TestCase;
-
 import org.apache.nutch.parse.Outlink;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.NutchConfiguration;
@@ -30,6 +28,9 @@ import java.util.ArrayList;
 import java.util.StringTokenizer;
 
 import org.cyberneko.html.parsers.*;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
 import org.xml.sax.*;
 import org.w3c.dom.*;
 import org.apache.html.dom.*;
@@ -37,125 +38,125 @@ import org.apache.html.dom.*;
 /** 
  * Unit tests for DOMContentUtils.
  */
-public class TestDOMContentUtils extends TestCase {
+public class TestDOMContentUtils {
 
   private static final String[] testPages= { 
     new String("<html><head><title> title </title><script> script </script>"
-               + "</head><body> body <a href=\"http://www.nutch.org\">"
-               + " anchor </a><!--comment-->"
-               + "</body></html>"),
-    new String("<html><head><title> title </title><script> script </script>"
-               + "</head><body> body <a href=\"/\">"
-               + " home </a><!--comment-->"
-               + "<style> style </style>"
-               + " <a href=\"bot.html\">"
-               + " bots </a>"
-               + "</body></html>"),
-    new String("<html><head><title> </title>"
-               + "</head><body> "
-               + "<a href=\"/\"> separate this "
-               + "<a href=\"ok\"> from this"
-               + "</a></a>"
-               + "</body></html>"),
-    // this one relies on certain neko fixup behavior, possibly
-    // distributing the anchors into the LI's-but not the other
-    // anchors (outside of them, instead)!  So you get a tree that
-    // looks like:
-    // ... <li> <a href=/> home </a> </li>
-    //     <li> <a href=/> <a href="1"> 1 </a> </a> </li>
-    //     <li> <a href=/> <a href="1"> <a href="2"> 2 </a> </a> </a> </li>
-    new String("<html><head><title> my title </title>"
-               + "</head><body> body "
-               + "<ul>"
-               + "<li> <a href=\"/\"> home"
-               + "<li> <a href=\"1\"> 1"
-               + "<li> <a href=\"2\"> 2"
-               + "</ul>"
-               + "</body></html>"),
-    // test frameset link extraction. The invalid frame in the middle will be
-    // fixed to a third standalone frame.
-    new String("<html><head><title> my title </title>"
-               + "</head><frameset rows=\"20,*\"> "
-               + "<frame src=\"top.html\">"
-               + "</frame>"
-               + "<frameset cols=\"20,*\">"
-               + "<frame src=\"left.html\">"
-               + "<frame src=\"invalid.html\"/>"
-               + "</frame>"
-               + "<frame src=\"right.html\">"
-               + "</frame>"
-               + "</frameset>"
-               + "</frameset>"
-               + "</body></html>"),
-    // test <area> and <iframe> link extraction + url normalization
-    new String("<html><head><title> my title </title>"
-               + "</head><body>"
-               + "<img src=\"logo.gif\" usemap=\"#green\" border=\"0\">"
-			   + "<map name=\"green\">"
-			   + "<area shape=\"polygon\" coords=\"19,44,45,11,87\" href=\"../index.html\">"
-			   + "<area shape=\"rect\" coords=\"128,132,241,179\" href=\"#bottom\">"
-			   + "<area shape=\"circle\" coords=\"68,211,35\" href=\"../bot.html\">"
-			   + "</map>"
-               + "<a name=\"bottom\"/><h1> the bottom </h1> "
-               + "<iframe src=\"../docs/index.html\"/>"
-               + "</body></html>"),
-    // test whitespace processing for plain text extraction
-    new String("<html><head>\n <title> my\t\n  title\r\n </title>\n"
-               + " </head>\n"
-               + " <body>\n"
-               + "    <h1> Whitespace\ttest  </h1> \n"
-               + "\t<a href=\"../index.html\">\n  \twhitespace  test\r\n\t</a>  \t\n"
-               + "    <p> This is<span> a whitespace<span></span> test</span>. Newlines\n"
-               + "should appear as space too.</p><p>Tabs\tare spaces too.\n</p>"
-               + "    This\t<b>is a</b> break -&gt;<br>and the line after<i> break</i>.<br>\n"
-               + "<table>"
-               + "    <tr><td>one</td><td>two</td><td>three</td></tr>\n"
-               + "    <tr><td>space here </td><td> space there</td><td>no space</td></tr>"
-               + "\t<tr><td>one\r\ntwo</td><td>two\tthree</td><td>three\r\tfour</td></tr>\n"
-               + "</table>put some text here<Br>and there."
-               + "<h2>End\tthis\rmadness\n!</h2>\r\n"
-               + "         .        .        .         ."
-               + "</body>  </html>"),
-
-    // test that <a rel=nofollow> links are not returned
-    new String("<html><head></head><body>"
-               + "<a href=\"http://www.nutch.org\" rel=\"nofollow\"> ignore </a>"
-               + "<a rel=\"nofollow\" href=\"http://www.nutch.org\"> ignore </a>"
-               + "</body></html>"),
-    // test that POST form actions are skipped
-    new String("<html><head></head><body>"
-            + "<form method='POST' action='/search.jsp'><input type=text>"
-            + "<input type=submit><p>test1</p></form>"
-            + "<form method='GET' action='/dummy.jsp'><input type=text>"
-            + "<input type=submit><p>test2</p></form></body></html>"),
-    // test that all form actions are skipped
-    new String("<html><head></head><body>"
-            + "<form method='POST' action='/search.jsp'><input type=text>"
-            + "<input type=submit><p>test1</p></form>"
-            + "<form method='GET' action='/dummy.jsp'><input type=text>"
-            + "<input type=submit><p>test2</p></form></body></html>"),
-    new String("<html><head><title> title </title>"
-      + "</head><body>"
-      + "<a href=\";x\">anchor1</a>"
-      + "<a href=\"g;x\">anchor2</a>"
-      + "<a href=\"g;x?y#s\">anchor3</a>"
-      + "</body></html>"),  
-    new String("<html><head><title> title </title>"
-        + "</head><body>"
-        + "<a href=\"g\">anchor1</a>"
-        + "<a href=\"g?y#s\">anchor2</a>"
-        + "<a href=\"?y=1\">anchor3</a>"
-        + "<a href=\"?y=1#s\">anchor4</a>"
-        + "<a href=\"?y=1;somethingelse\">anchor5</a>"
-        + "</body></html>"), 
-    new String("<html><head><title> title </title>"
-        + "</head><body>"
-        + "<a href=\"g\"><!--no anchor--></a>"
-        + "<a href=\"g1\"> <!--whitespace-->  </a>"
-        + "<a href=\"g2\">  <img src=test.gif alt='bla bla'> </a>"
-        + "</body></html>"), 
+        + "</head><body> body <a href=\"http://www.nutch.org\">"
+        + " anchor </a><!--comment-->"
+        + "</body></html>"),
+        new String("<html><head><title> title </title><script> script </script>"
+            + "</head><body> body <a href=\"/\">"
+            + " home </a><!--comment-->"
+            + "<style> style </style>"
+            + " <a href=\"bot.html\">"
+            + " bots </a>"
+            + "</body></html>"),
+            new String("<html><head><title> </title>"
+                + "</head><body> "
+                + "<a href=\"/\"> separate this "
+                + "<a href=\"ok\"> from this"
+                + "</a></a>"
+                + "</body></html>"),
+                // this one relies on certain neko fixup behavior, possibly
+                // distributing the anchors into the LI's-but not the other
+                // anchors (outside of them, instead)!  So you get a tree that
+                // looks like:
+                // ... <li> <a href=/> home </a> </li>
+                //     <li> <a href=/> <a href="1"> 1 </a> </a> </li>
+                //     <li> <a href=/> <a href="1"> <a href="2"> 2 </a> </a> </a> </li>
+                new String("<html><head><title> my title </title>"
+                    + "</head><body> body "
+                    + "<ul>"
+                    + "<li> <a href=\"/\"> home"
+                    + "<li> <a href=\"1\"> 1"
+                    + "<li> <a href=\"2\"> 2"
+                    + "</ul>"
+                    + "</body></html>"),
+                    // test frameset link extraction. The invalid frame in the middle will be
+                    // fixed to a third standalone frame.
+                    new String("<html><head><title> my title </title>"
+                        + "</head><frameset rows=\"20,*\"> "
+                        + "<frame src=\"top.html\">"
+                        + "</frame>"
+                        + "<frameset cols=\"20,*\">"
+                        + "<frame src=\"left.html\">"
+                        + "<frame src=\"invalid.html\"/>"
+                        + "</frame>"
+                        + "<frame src=\"right.html\">"
+                        + "</frame>"
+                        + "</frameset>"
+                        + "</frameset>"
+                        + "</body></html>"),
+                        // test <area> and <iframe> link extraction + url normalization
+                        new String("<html><head><title> my title </title>"
+                            + "</head><body>"
+                            + "<img src=\"logo.gif\" usemap=\"#green\" border=\"0\">"
+                            + "<map name=\"green\">"
+                            + "<area shape=\"polygon\" coords=\"19,44,45,11,87\" href=\"../index.html\">"
+                            + "<area shape=\"rect\" coords=\"128,132,241,179\" href=\"#bottom\">"
+                            + "<area shape=\"circle\" coords=\"68,211,35\" href=\"../bot.html\">"
+                            + "</map>"
+                            + "<a name=\"bottom\"/><h1> the bottom </h1> "
+                            + "<iframe src=\"../docs/index.html\"/>"
+                            + "</body></html>"),
+                            // test whitespace processing for plain text extraction
+                            new String("<html><head>\n <title> my\t\n  title\r\n </title>\n"
+                                + " </head>\n"
+                                + " <body>\n"
+                                + "    <h1> Whitespace\ttest  </h1> \n"
+                                + "\t<a href=\"../index.html\">\n  \twhitespace  test\r\n\t</a>  \t\n"
+                                + "    <p> This is<span> a whitespace<span></span> test</span>. Newlines\n"
+                                + "should appear as space too.</p><p>Tabs\tare spaces too.\n</p>"
+                                + "    This\t<b>is a</b> break -&gt;<br>and the line after<i> break</i>.<br>\n"
+                                + "<table>"
+                                + "    <tr><td>one</td><td>two</td><td>three</td></tr>\n"
+                                + "    <tr><td>space here </td><td> space there</td><td>no space</td></tr>"
+                                + "\t<tr><td>one\r\ntwo</td><td>two\tthree</td><td>three\r\tfour</td></tr>\n"
+                                + "</table>put some text here<Br>and there."
+                                + "<h2>End\tthis\rmadness\n!</h2>\r\n"
+                                + "         .        .        .         ."
+                                + "</body>  </html>"),
+
+                                // test that <a rel=nofollow> links are not returned
+                                new String("<html><head></head><body>"
+                                    + "<a href=\"http://www.nutch.org\" rel=\"nofollow\"> ignore </a>"
+                                    + "<a rel=\"nofollow\" href=\"http://www.nutch.org\"> ignore </a>"
+                                    + "</body></html>"),
+                                    // test that POST form actions are skipped
+                                    new String("<html><head></head><body>"
+                                        + "<form method='POST' action='/search.jsp'><input type=text>"
+                                        + "<input type=submit><p>test1</p></form>"
+                                        + "<form method='GET' action='/dummy.jsp'><input type=text>"
+                                        + "<input type=submit><p>test2</p></form></body></html>"),
+                                        // test that all form actions are skipped
+                                        new String("<html><head></head><body>"
+                                            + "<form method='POST' action='/search.jsp'><input type=text>"
+                                            + "<input type=submit><p>test1</p></form>"
+                                            + "<form method='GET' action='/dummy.jsp'><input type=text>"
+                                            + "<input type=submit><p>test2</p></form></body></html>"),
+                                            new String("<html><head><title> title </title>"
+                                                + "</head><body>"
+                                                + "<a href=\";x\">anchor1</a>"
+                                                + "<a href=\"g;x\">anchor2</a>"
+                                                + "<a href=\"g;x?y#s\">anchor3</a>"
+                                                + "</body></html>"),  
+                                                new String("<html><head><title> title </title>"
+                                                    + "</head><body>"
+                                                    + "<a href=\"g\">anchor1</a>"
+                                                    + "<a href=\"g?y#s\">anchor2</a>"
+                                                    + "<a href=\"?y=1\">anchor3</a>"
+                                                    + "<a href=\"?y=1#s\">anchor4</a>"
+                                                    + "<a href=\"?y=1;somethingelse\">anchor5</a>"
+                                                    + "</body></html>"), 
+                                                    new String("<html><head><title> title </title>"
+                                                        + "</head><body>"
+                                                        + "<a href=\"g\"><!--no anchor--></a>"
+                                                        + "<a href=\"g1\"> <!--whitespace-->  </a>"
+                                                        + "<a href=\"g2\">  <img src=test.gif alt='bla bla'> </a>"
+                                                        + "</body></html>"), 
   };
-  
+
   private static int SKIP = 9;
 
   private static String[] testBaseHrefs= {
@@ -173,12 +174,12 @@ public class TestDOMContentUtils extends
     "http://www.nutch.org/;something",
     "http://www.nutch.org/"
   };
-    
+
   private static final DocumentFragment testDOMs[]=
-    new DocumentFragment[testPages.length];
+      new DocumentFragment[testPages.length];
 
   private static URL[] testBaseHrefURLs= 
-    new URL[testPages.length];
+      new URL[testPages.length];
 
 
   private static final String[] answerText= {
@@ -194,12 +195,12 @@ public class TestDOMContentUtils extends
         + "one two three space here space there no space "
         + "one two two three three four put some text here and there. "
         + "End this madness ! . . . .",
-    "ignore ignore",
-    "test1 test2",
-    "test1 test2",
-    "title anchor1 anchor2 anchor3",
-    "title anchor1 anchor2 anchor3 anchor4 anchor5",
-    "title"
+        "ignore ignore",
+        "test1 test2",
+        "test1 test2",
+        "title anchor1 anchor2 anchor3",
+        "title anchor1 anchor2 anchor3 anchor4 anchor5",
+        "title"
   };
 
   private static final String[] answerTitle= {
@@ -220,15 +221,12 @@ public class TestDOMContentUtils extends
 
   // note: should be in page-order
   private static Outlink[][] answerOutlinks;
-  
+
   private static Configuration conf;
   private static DOMContentUtils utils = null;
-  
-  public TestDOMContentUtils(String name) { 
-    super(name); 
-  }
 
-  private static void setup() {
+  @Before
+  public void setup() {
     conf = NutchConfiguration.create();
     conf.setBoolean("parser.html.form.use_action", true);
     utils = new DOMContentUtils(conf);
@@ -239,83 +237,83 @@ public class TestDOMContentUtils extends
           true);
     } catch (SAXException e) {}
     for (int i= 0; i < testPages.length; i++) {
-        DocumentFragment node= 
+      DocumentFragment node= 
           new HTMLDocumentImpl().createDocumentFragment();
-        try {
-          parser.parse(
+      try {
+        parser.parse(
             new InputSource( 
-              new ByteArrayInputStream(testPages[i].getBytes()) ),
-            node);
-          testBaseHrefURLs[i]= new URL(testBaseHrefs[i]);
-        } catch (Exception e) {
-          assertTrue("caught exception: " + e, false);
-        } 
+                new ByteArrayInputStream(testPages[i].getBytes()) ),
+                node);
+        testBaseHrefURLs[i]= new URL(testBaseHrefs[i]);
+      } catch (Exception e) {
+        Assert.assertTrue("caught exception: " + e, false);
+      } 
       testDOMs[i]= node;
     }
     try {
-     answerOutlinks = new Outlink[][]{ 
-         {
-           new Outlink("http://www.nutch.org", "anchor"),
-         },
-         {
-           new Outlink("http://www.nutch.org/", "home"),
-           new Outlink("http://www.nutch.org/docs/bot.html", "bots"),
-         },
-         {
-           new Outlink("http://www.nutch.org/", "separate this"),
-           new Outlink("http://www.nutch.org/docs/ok", "from this"),
-         },
-         {
-           new Outlink("http://www.nutch.org/", "home"),
-           new Outlink("http://www.nutch.org/docs/1", "1"),
-           new Outlink("http://www.nutch.org/docs/2", "2"),
-         },
-         {
-           new Outlink("http://www.nutch.org/frames/top.html", ""),
-           new Outlink("http://www.nutch.org/frames/left.html", ""),
-           new Outlink("http://www.nutch.org/frames/invalid.html", ""),
-           new Outlink("http://www.nutch.org/frames/right.html", ""),
-         },
-         {
-           new Outlink("http://www.nutch.org/maps/logo.gif", ""),
-           new Outlink("http://www.nutch.org/index.html", ""),
-           new Outlink("http://www.nutch.org/maps/#bottom", ""),
-           new Outlink("http://www.nutch.org/bot.html", ""),
-           new Outlink("http://www.nutch.org/docs/index.html", ""),
-         },
-         {
-             new Outlink("http://www.nutch.org/index.html", "whitespace test"),
-         },
-         {
-         },
-         {
-           new Outlink("http://www.nutch.org/dummy.jsp", "test2"),
-         },
-         {
-         },
-         {
-           new Outlink("http://www.nutch.org/;x", "anchor1"),
-           new Outlink("http://www.nutch.org/g;x", "anchor2"),
-           new Outlink("http://www.nutch.org/g;x?y#s", "anchor3")
-         },
-         {
-           // this is tricky - see RFC3986 section 5.4.1 example 7
-           new Outlink("http://www.nutch.org/g", "anchor1"),
-           new Outlink("http://www.nutch.org/g?y#s", "anchor2"),
-           new Outlink("http://www.nutch.org/;something?y=1", "anchor3"),
-           new Outlink("http://www.nutch.org/;something?y=1#s", "anchor4"),
-           new Outlink("http://www.nutch.org/;something?y=1;somethingelse", "anchor5")
-         },
-         {
-           new Outlink("http://www.nutch.org/g", ""),
-           new Outlink("http://www.nutch.org/g1", ""),
-           new Outlink("http://www.nutch.org/g2", "bla bla"),
-           new Outlink("http://www.nutch.org/test.gif", "bla bla"),
-         }
+      answerOutlinks = new Outlink[][]{ 
+          {
+            new Outlink("http://www.nutch.org", "anchor"),
+          },
+          {
+            new Outlink("http://www.nutch.org/", "home"),
+            new Outlink("http://www.nutch.org/docs/bot.html", "bots"),
+          },
+          {
+            new Outlink("http://www.nutch.org/", "separate this"),
+            new Outlink("http://www.nutch.org/docs/ok", "from this"),
+          },
+          {
+            new Outlink("http://www.nutch.org/", "home"),
+            new Outlink("http://www.nutch.org/docs/1", "1"),
+            new Outlink("http://www.nutch.org/docs/2", "2"),
+          },
+          {
+            new Outlink("http://www.nutch.org/frames/top.html", ""),
+            new Outlink("http://www.nutch.org/frames/left.html", ""),
+            new Outlink("http://www.nutch.org/frames/invalid.html", ""),
+            new Outlink("http://www.nutch.org/frames/right.html", ""),
+          },
+          {
+            new Outlink("http://www.nutch.org/maps/logo.gif", ""),
+            new Outlink("http://www.nutch.org/index.html", ""),
+            new Outlink("http://www.nutch.org/maps/#bottom", ""),
+            new Outlink("http://www.nutch.org/bot.html", ""),
+            new Outlink("http://www.nutch.org/docs/index.html", ""),
+          },
+          {
+            new Outlink("http://www.nutch.org/index.html", "whitespace test"),
+          },
+          {
+          },
+          {
+            new Outlink("http://www.nutch.org/dummy.jsp", "test2"),
+          },
+          {
+          },
+          {
+            new Outlink("http://www.nutch.org/;x", "anchor1"),
+            new Outlink("http://www.nutch.org/g;x", "anchor2"),
+            new Outlink("http://www.nutch.org/g;x?y#s", "anchor3")
+          },
+          {
+            // this is tricky - see RFC3986 section 5.4.1 example 7
+            new Outlink("http://www.nutch.org/g", "anchor1"),
+            new Outlink("http://www.nutch.org/g?y#s", "anchor2"),
+            new Outlink("http://www.nutch.org/;something?y=1", "anchor3"),
+            new Outlink("http://www.nutch.org/;something?y=1#s", "anchor4"),
+            new Outlink("http://www.nutch.org/;something?y=1;somethingelse", "anchor5")
+          },
+          {
+            new Outlink("http://www.nutch.org/g", ""),
+            new Outlink("http://www.nutch.org/g1", ""),
+            new Outlink("http://www.nutch.org/g2", "bla bla"),
+            new Outlink("http://www.nutch.org/test.gif", "bla bla"),
+          }
       };
-   
+
     } catch (MalformedURLException e) {
-        
+
     }
   }
 
@@ -334,6 +332,7 @@ public class TestDOMContentUtils extends
     return true;
   }
 
+  @Test
   public void testGetText() {
     if (testDOMs[0] == null) 
       setup();
@@ -341,14 +340,15 @@ public class TestDOMContentUtils extends
       StringBuffer sb= new StringBuffer();
       utils.getText(sb, testDOMs[i]);
       String text= sb.toString();
-      assertTrue("expecting text: " + answerText[i] 
-                 + System.getProperty("line.separator") 
-                 + System.getProperty("line.separator") 
-                 + "got text: "+ text, 
-                 equalsIgnoreWhitespace(answerText[i], text));
+      Assert.assertTrue("expecting text: " + answerText[i] 
+          + System.getProperty("line.separator") 
+          + System.getProperty("line.separator") 
+          + "got text: "+ text, 
+          equalsIgnoreWhitespace(answerText[i], text));
     }
   }
 
+  @Test
   public void testGetTitle() {
     if (testDOMs[0] == null) 
       setup();
@@ -356,14 +356,15 @@ public class TestDOMContentUtils extends
       StringBuffer sb= new StringBuffer();
       utils.getTitle(sb, testDOMs[i]);
       String text= sb.toString();
-      assertTrue("expecting text: " + answerText[i] 
-                 + System.getProperty("line.separator") 
-                 + System.getProperty("line.separator") 
-                 + "got text: "+ text, 
-                 equalsIgnoreWhitespace(answerTitle[i], text));
+      Assert.assertTrue("expecting text: " + answerText[i] 
+          + System.getProperty("line.separator") 
+          + System.getProperty("line.separator") 
+          + "got text: "+ text, 
+          equalsIgnoreWhitespace(answerTitle[i], text));
     }
   }
 
+  @Test
   public void testGetOutlinks() {
     if (testDOMs[0] == null) 
       setup();
@@ -398,31 +399,31 @@ public class TestDOMContentUtils extends
 
   private static final void compareOutlinks(Outlink[] o1, Outlink[] o2) {
     if (o1.length != o2.length) {
-      assertTrue("got wrong number of outlinks (expecting " + o1.length 
-                 + ", got " + o2.length + ")" 
-                 + System.getProperty("line.separator") 
-                 + "answer: " + System.getProperty("line.separator") 
-                 + outlinksString(o1) 
-                 + System.getProperty("line.separator") 
-                 + "got: " + System.getProperty("line.separator") 
-                 + outlinksString(o2)
-                 + System.getProperty("line.separator"),
-                 false
-        );
+      Assert.assertTrue("got wrong number of outlinks (expecting " + o1.length 
+          + ", got " + o2.length + ")" 
+          + System.getProperty("line.separator") 
+          + "answer: " + System.getProperty("line.separator") 
+          + outlinksString(o1) 
+          + System.getProperty("line.separator") 
+          + "got: " + System.getProperty("line.separator") 
+          + outlinksString(o2)
+          + System.getProperty("line.separator"),
+          false
+          );
     }
 
     for (int i= 0; i < o1.length; i++) {
       if (!o1[i].equals(o2[i])) {
-        assertTrue("got wrong outlinks at position " + i
-                   + System.getProperty("line.separator") 
-                   + "answer: " + System.getProperty("line.separator") 
-                   + "'" + o1[i].getToUrl() + "', anchor: '" + o1[i].getAnchor() + "'"
-                   + System.getProperty("line.separator") 
-                   + "got: " + System.getProperty("line.separator") 
-                   + "'" + o2[i].getToUrl() + "', anchor: '" + o2[i].getAnchor() + "'",
-                   false
-          );
-        
+        Assert.assertTrue("got wrong outlinks at position " + i
+            + System.getProperty("line.separator") 
+            + "answer: " + System.getProperty("line.separator") 
+            + "'" + o1[i].getToUrl() + "', anchor: '" + o1[i].getAnchor() + "'"
+            + System.getProperty("line.separator") 
+            + "got: " + System.getProperty("line.separator") 
+            + "'" + o2[i].getToUrl() + "', anchor: '" + o2[i].getAnchor() + "'",
+            false
+            );
+
       }
     }
   }

Modified: nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java (original)
+++ nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java Sat Mar 29 00:54:40 2014
@@ -21,16 +21,17 @@ import java.nio.charset.Charset;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.metadata.Metadata;
+import org.apache.nutch.parse.html.HtmlParser;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.Parser;
 import org.apache.nutch.protocol.Content;
 import org.apache.nutch.util.NutchConfiguration;
+import org.junit.Assert;
+import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import junit.framework.TestCase;
-
-public class TestHtmlParser extends TestCase {
+public class TestHtmlParser {
 
   public static final Logger LOG = LoggerFactory.getLogger(TestHtmlParser.class);
 
@@ -90,12 +91,11 @@ public class TestHtmlParser extends Test
           + encodingTestContent
     }
   };
-  
+
   private Configuration conf;
   private Parser parser;
-  
-  public TestHtmlParser(String name) { 
-    super(name);
+
+  public TestHtmlParser() { 
     conf = NutchConfiguration.create();
     parser = new HtmlParser();
     parser.setConf(conf);
@@ -107,7 +107,8 @@ public class TestHtmlParser extends Test
         new Content(dummyUrl, dummyUrl, contentBytes, "text/html", new Metadata(),
             conf)).get(dummyUrl);
   }
-  
+
+  @Test
   public void testEncodingDetection() {
     for (String[] testPage : encodingTestPages) {
       String name = testPage[0];
@@ -121,14 +122,14 @@ public class TestHtmlParser extends Test
       LOG.info("title:\t" + title);
       LOG.info("keywords:\t" + keywords);
       LOG.info("text:\t" + text);
-      assertEquals("Title not extracted properly (" + name + ")",
+      Assert.assertEquals("Title not extracted properly (" + name + ")",
           encodingTestKeywords, title);
       for (String keyword : encodingTestKeywords.split(",\\s*")) {
-        assertTrue(keyword + " not found in text (" + name + ")",
+        Assert.assertTrue(keyword + " not found in text (" + name + ")",
             text.contains(keyword));
       }
       if (keywords != null) {
-        assertEquals("Keywords not extracted properly (" + name + ")",
+        Assert.assertEquals("Keywords not extracted properly (" + name + ")",
             encodingTestKeywords, keywords);
       }
     }

Modified: nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java (original)
+++ nutch/trunk/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestRobotsMetaProcessor.java Sat Mar 29 00:54:40 2014
@@ -17,23 +17,20 @@
 
 package org.apache.nutch.parse.html;
 
-import junit.framework.TestCase;
-
 import org.apache.nutch.parse.HTMLMetaTags;
 
 import java.io.ByteArrayInputStream;
 import java.net.URL;
 
 import org.cyberneko.html.parsers.*;
+import org.junit.Assert;
+import org.junit.Test;
 import org.xml.sax.*;
 import org.w3c.dom.*;
 import org.apache.html.dom.*;
 
 /** Unit tests for HTMLMetaProcessor. */
-public class TestRobotsMetaProcessor extends TestCase {
-  public TestRobotsMetaProcessor(String name) { 
-    super(name); 
-  }
+public class TestRobotsMetaProcessor {
 
   /*
 
@@ -46,71 +43,71 @@ public class TestRobotsMetaProcessor ext
 
   <META HTTP-EQUIV="Pragma" CONTENT="no-cache">
 
-  */
+   */
 
 
   public static String[] tests= 
-  {
-    "<html><head><title>test page</title>"
-    + "<META NAME=\"ROBOTS\" CONTENT=\"NONE\"> "
-    + "<META HTTP-EQUIV=\"PRAGMA\" CONTENT=\"NO-CACHE\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"all\"> "
-    + "<meta http-equiv=\"pragma\" content=\"no-cache\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<MeTa NaMe=\"RoBoTs\" CoNtEnT=\"nOnE\"> "
-    + "<MeTa HtTp-EqUiV=\"pRaGmA\" cOnTeNt=\"No-CaChE\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"none\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
+    {
     "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"noindex,nofollow\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
+        + "<META NAME=\"ROBOTS\" CONTENT=\"NONE\"> "
+        + "<META HTTP-EQUIV=\"PRAGMA\" CONTENT=\"NO-CACHE\"> "
+        + "</head><body>"
+        + " some text"
+        + "</body></html>",
+
+        "<html><head><title>test page</title>"
+            + "<meta name=\"robots\" content=\"all\"> "
+            + "<meta http-equiv=\"pragma\" content=\"no-cache\"> "
+            + "</head><body>"
+            + " some text"
+            + "</body></html>",
+
+            "<html><head><title>test page</title>"
+                + "<MeTa NaMe=\"RoBoTs\" CoNtEnT=\"nOnE\"> "
+                + "<MeTa HtTp-EqUiV=\"pRaGmA\" cOnTeNt=\"No-CaChE\"> "
+                + "</head><body>"
+                + " some text"
+                + "</body></html>",
+
+                "<html><head><title>test page</title>"
+                    + "<meta name=\"robots\" content=\"none\"> "
+                    + "</head><body>"
+                    + " some text"
+                    + "</body></html>",
+
+                    "<html><head><title>test page</title>"
+                        + "<meta name=\"robots\" content=\"noindex,nofollow\"> "
+                        + "</head><body>"
+                        + " some text"
+                        + "</body></html>",
+
+                        "<html><head><title>test page</title>"
+                            + "<meta name=\"robots\" content=\"noindex,follow\"> "
+                            + "</head><body>"
+                            + " some text"
+                            + "</body></html>",
+
+                            "<html><head><title>test page</title>"
+                                + "<meta name=\"robots\" content=\"index,nofollow\"> "
+                                + "</head><body>"
+                                + " some text"
+                                + "</body></html>",
+
+                                "<html><head><title>test page</title>"
+                                    + "<meta name=\"robots\" content=\"index,follow\"> "
+                                    + "<base href=\"http://www.nutch.org/\">"
+                                    + "</head><body>"
+                                    + " some text"
+                                    + "</body></html>",
+
+                                    "<html><head><title>test page</title>"
+                                        + "<meta name=\"robots\"> "
+                                        + "<base href=\"http://www.nutch.org/base/\">"
+                                        + "</head><body>"
+                                        + " some text"
+                                        + "</body></html>",
 
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"noindex,follow\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"index,nofollow\"> "
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\" content=\"index,follow\"> "
-    + "<base href=\"http://www.nutch.org/\">"
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-    "<html><head><title>test page</title>"
-    + "<meta name=\"robots\"> "
-    + "<base href=\"http://www.nutch.org/base/\">"
-    + "</head><body>"
-    + " some text"
-    + "</body></html>",
-
-  };
+    };
 
   public static final boolean[][] answers= {
     {true, true, true},     // NONE
@@ -126,25 +123,26 @@ public class TestRobotsMetaProcessor ext
 
   private URL[][] currURLsAndAnswers;
 
+  @Test
   public void testRobotsMetaProcessor() {
     DOMFragmentParser parser= new DOMFragmentParser();;
 
     try { 
       currURLsAndAnswers= new URL[][] {
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org"), null},
-        {new URL("http://www.nutch.org/foo/"), 
-         new URL("http://www.nutch.org/")},
-        {new URL("http://www.nutch.org"), 
-         new URL("http://www.nutch.org/base/")}
+          {new URL("http://www.nutch.org"), null},
+          {new URL("http://www.nutch.org"), null},
+          {new URL("http://www.nutch.org"), null},
+          {new URL("http://www.nutch.org"), null},
+          {new URL("http://www.nutch.org"), null},
+          {new URL("http://www.nutch.org"), null},
+          {new URL("http://www.nutch.org"), null},
+          {new URL("http://www.nutch.org/foo/"), 
+            new URL("http://www.nutch.org/")},
+            {new URL("http://www.nutch.org"), 
+              new URL("http://www.nutch.org/base/")}
       };
     } catch (Exception e) {
-      assertTrue("couldn't make test URLs!", false);
+      Assert.assertTrue("couldn't make test URLs!", false);
     }
 
     for (int i= 0; i < tests.length; i++) {
@@ -160,22 +158,22 @@ public class TestRobotsMetaProcessor ext
 
       HTMLMetaTags robotsMeta= new HTMLMetaTags();
       HTMLMetaProcessor.getMetaTags(robotsMeta, node, 
-                                                  currURLsAndAnswers[i][0]);
+          currURLsAndAnswers[i][0]);
+
+      Assert.assertTrue("got index wrong on test " + i,
+          robotsMeta.getNoIndex() == answers[i][0]);
+      Assert.assertTrue("got follow wrong on test " + i,
+          robotsMeta.getNoFollow() == answers[i][1]);
+      Assert.assertTrue("got cache wrong on test " + i,
+          robotsMeta.getNoCache() == answers[i][2]);
+      Assert.assertTrue("got base href wrong on test " + i + " (got "
+          + robotsMeta.getBaseHref() + ")",
+          ( (robotsMeta.getBaseHref() == null)
+              && (currURLsAndAnswers[i][1] == null) )
+              || ( (robotsMeta.getBaseHref() != null)
+                  && robotsMeta.getBaseHref().equals(
+                      currURLsAndAnswers[i][1]) ) );
 
-      assertTrue("got index wrong on test " + i,
-                 robotsMeta.getNoIndex() == answers[i][0]);
-      assertTrue("got follow wrong on test " + i,
-                 robotsMeta.getNoFollow() == answers[i][1]);
-      assertTrue("got cache wrong on test " + i,
-                 robotsMeta.getNoCache() == answers[i][2]);
-      assertTrue("got base href wrong on test " + i + " (got "
-                 + robotsMeta.getBaseHref() + ")",
-                 ( (robotsMeta.getBaseHref() == null)
-                    && (currURLsAndAnswers[i][1] == null) )
-                 || ( (robotsMeta.getBaseHref() != null)
-                      && robotsMeta.getBaseHref().equals(
-                        currURLsAndAnswers[i][1]) ) );
-      
     }
   }
 

Modified: nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/html/TestMetatagParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/html/TestMetatagParser.java?rev=1582928&r1=1582927&r2=1582928&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/html/TestMetatagParser.java (original)
+++ nutch/trunk/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/html/TestMetatagParser.java Sat Mar 29 00:54:40 2014
@@ -20,8 +20,6 @@ package org.apache.nutch.parse.html;
 import java.util.Set;
 import java.util.TreeSet;
 
-import junit.framework.TestCase;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
 import org.apache.nutch.crawl.CrawlDatum;
@@ -32,8 +30,10 @@ import org.apache.nutch.protocol.Content
 import org.apache.nutch.protocol.Protocol;
 import org.apache.nutch.protocol.ProtocolFactory;
 import org.apache.nutch.util.NutchConfiguration;
+import org.junit.Assert;
+import org.junit.Test;
 
-public class TestMetatagParser extends TestCase {
+public class TestMetatagParser {
   
   private String fileSeparator = System.getProperty("file.separator");
   private String sampleDir = System.getProperty("test.data", ".");
@@ -42,10 +42,6 @@ public class TestMetatagParser extends T
   private String description = "This is a test of description";
   private String keywords = "This is a test of keywords";
   
-  public TestMetatagParser(String name) {
-    super(name);
-  }
-  
   public Metadata parseMeta(String fileName, Configuration conf) {
     Metadata metadata = null;
     try {
@@ -57,21 +53,23 @@ public class TestMetatagParser extends T
       metadata = parse.getData().getParseMeta();
     } catch (Exception e) {
       e.printStackTrace();
-      fail(e.toString());
+      Assert.fail(e.toString());
     }
     return metadata;
   }
 
+  @Test
   public void testIt() {
     Configuration conf = NutchConfiguration.create();
     
     // check that we get the same values
     Metadata parseMeta= parseMeta(sampleFile, conf);
       
-    assertEquals(description, parseMeta.get("metatag.description"));
-    assertEquals(keywords, parseMeta.get("metatag.keywords"));
+    Assert.assertEquals(description, parseMeta.get("metatag.description"));
+    Assert.assertEquals(keywords, parseMeta.get("metatag.keywords"));
   }
 
+  @Test
   public void testMultiValueMetatags() {
     Configuration conf = NutchConfiguration.create();
     conf.set("metatags.names", "keywords;DC.creator");
@@ -87,7 +85,7 @@ public class TestMetatagParser extends T
     }
     String[] expectedValues1 = {"Doug Cutting", "Michael Cafarella"};
     for (String val : expectedValues1) {
-      assertTrue(failMessage + val, valueSet.contains(val));      
+      Assert.assertTrue(failMessage + val, valueSet.contains(val));      
     }
     
     valueSet.clear();
@@ -96,7 +94,7 @@ public class TestMetatagParser extends T
     }
     String[] expectedValues2 = {"robot d'indexation", "web crawler", "Webcrawler"};
     for (String val : expectedValues2) {
-      assertTrue(failMessage + val, valueSet.contains(val));      
+      Assert.assertTrue(failMessage + val, valueSet.contains(val));      
     }
   }