You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by pk...@apache.org on 2006/01/02 23:07:57 UTC
svn commit: r365448 - in /lucene/nutch/trunk/src:
plugin/parse-ext/src/test/org/apache/nutch/parse/ext/
plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/
plugin/parse-msword/src/test/org/apache/nutch/parse/msword/
plugin/parse-pdf...
Author: pkosiorowski
Date: Mon Jan 2 14:07:34 2006
New Revision: 365448
URL: http://svn.apache.org/viewcvs?rev=365448&view=rev
Log:
Fixed JUnit tests after trunk cleanup.
Removed:
lucene/nutch/trunk/src/test/org/apache/nutch/db/DBTester.java
lucene/nutch/trunk/src/test/org/apache/nutch/db/TestWebDB.java
lucene/nutch/trunk/src/test/org/apache/nutch/fetcher/TestFetcher.java
lucene/nutch/trunk/src/test/org/apache/nutch/fetcher/TestFetcherOutput.java
lucene/nutch/trunk/src/test/org/apache/nutch/pagedb/TestFetchListEntry.java
lucene/nutch/trunk/src/test/org/apache/nutch/pagedb/TestPage.java
lucene/nutch/trunk/src/test/org/apache/nutch/tools/TestSegmentMergeTool.java
Modified:
lucene/nutch/trunk/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java
lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java
lucene/nutch/trunk/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java
lucene/nutch/trunk/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java
lucene/nutch/trunk/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java
lucene/nutch/trunk/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseText.java
Modified: lucene/nutch/trunk/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java?rev=365448&r1=365447&r2=365448&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-ext/src/test/org/apache/nutch/parse/ext/TestExtParser.java Mon Jan 2 14:07:34 2006
@@ -25,6 +25,9 @@
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.parse.ParseException;
+import org.apache.nutch.io.UTF8;
+import org.apache.nutch.crawl.CrawlDatum;
+
import junit.framework.TestCase;
import java.io.File;
@@ -77,7 +80,7 @@
// get nutch content
Protocol protocol = ProtocolFactory.getProtocol(urlString);
- content = protocol.getProtocolOutput(urlString).getContent();
+ content = protocol.getProtocolOutput(new UTF8(urlString), new CrawlDatum()).getContent();
protocol = null;
}
Modified: lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java?rev=365448&r1=365447&r2=365448&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-mspowerpoint/src/test/org/apache/nutch/parse/mspowerpoint/TestMSPowerPointParser.java Mon Jan 2 14:07:34 2006
@@ -35,6 +35,9 @@
import org.apache.nutch.protocol.ProtocolFactory;
import org.apache.nutch.util.LogFormatter;
+import org.apache.nutch.io.UTF8;
+import org.apache.nutch.crawl.CrawlDatum;
+
/**
* <p>
* Unit tests for MSPowerPointParser.
@@ -103,7 +106,7 @@
System.out.println("Testing file: " + this.urlString + "...");
this.protocol = ProtocolFactory.getProtocol(this.urlString);
- this.content = this.protocol.getProtocolOutput(this.urlString).getContent();
+ this.content = this.protocol.getProtocolOutput(new UTF8(this.urlString), new CrawlDatum()).getContent();
}
/**
Modified: lucene/nutch/trunk/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java?rev=365448&r1=365447&r2=365448&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-msword/src/test/org/apache/nutch/parse/msword/TestMSWordParser.java Mon Jan 2 14:07:34 2006
@@ -25,6 +25,9 @@
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.parse.ParseException;
+import org.apache.nutch.io.UTF8;
+import org.apache.nutch.crawl.CrawlDatum;
+
import junit.framework.TestCase;
/**
@@ -62,7 +65,7 @@
urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
protocol = ProtocolFactory.getProtocol(urlString);
- content = protocol.getProtocolOutput(urlString).getContent();
+ content = protocol.getProtocolOutput(new UTF8(urlString), new CrawlDatum()).getContent();
parse = ParseUtil.parseByParserId("parse-msword",content);
assertTrue(parse.getText().startsWith(expectedText));
Modified: lucene/nutch/trunk/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java?rev=365448&r1=365447&r2=365448&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-pdf/src/test/org/apache/nutch/parse/pdf/TestPdfParser.java Mon Jan 2 14:07:34 2006
@@ -25,6 +25,9 @@
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.parse.ParseException;
+import org.apache.nutch.io.UTF8;
+import org.apache.nutch.crawl.CrawlDatum;
+
import junit.framework.TestCase;
/**
@@ -62,7 +65,7 @@
urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
protocol = ProtocolFactory.getProtocol(urlString);
- content = protocol.getProtocolOutput(urlString).getContent();
+ content = protocol.getProtocolOutput(new UTF8(urlString), new CrawlDatum()).getContent();
parse = ParseUtil.parseByParserId("parse-pdf",content);
int index = parse.getText().indexOf(expectedText);
Modified: lucene/nutch/trunk/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java?rev=365448&r1=365447&r2=365448&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-rss/src/test/org/apache/nutch/parse/rss/TestRSSParser.java Mon Jan 2 14:07:34 2006
@@ -27,6 +27,9 @@
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.parse.Outlink;
+import org.apache.nutch.io.UTF8;
+import org.apache.nutch.crawl.CrawlDatum;
+
import junit.framework.TestCase;
/**
@@ -80,7 +83,7 @@
urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
protocol = ProtocolFactory.getProtocol(urlString);
- content = protocol.getProtocolOutput(urlString).getContent();
+ content = protocol.getProtocolOutput(new UTF8(urlString), new CrawlDatum()).getContent();
parse = ParseUtil.parseByParserId("parse-rss",content);
//check that there are 3 outlinks:
Modified: lucene/nutch/trunk/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java?rev=365448&r1=365447&r2=365448&view=diff
==============================================================================
--- lucene/nutch/trunk/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java (original)
+++ lucene/nutch/trunk/src/plugin/parse-zip/src/test/org/apache/nutch/parse/zip/TestZipParser.java Mon Jan 2 14:07:34 2006
@@ -25,6 +25,9 @@
import org.apache.nutch.parse.ParseUtil;
import org.apache.nutch.parse.ParseException;
+import org.apache.nutch.io.UTF8;
+import org.apache.nutch.crawl.CrawlDatum;
+
import junit.framework.TestCase;
/**
@@ -62,7 +65,7 @@
urlString = "file:" + sampleDir + fileSeparator + sampleFiles[i];
protocol = ProtocolFactory.getProtocol(urlString);
- content = protocol.getProtocolOutput(urlString).getContent();
+ content = protocol.getProtocolOutput(new UTF8(urlString), new CrawlDatum()).getContent();
parse = ParseUtil.parseByParserId("parse-zip",content);
assertTrue(parse.getText().equals(expectedText));
}
Modified: lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseText.java
URL: http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseText.java?rev=365448&r1=365447&r2=365448&view=diff
==============================================================================
--- lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseText.java (original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/parse/TestParseText.java Mon Jan 2 14:07:34 2006
@@ -18,7 +18,6 @@
import java.io.*;
import org.apache.nutch.io.*;
-import org.apache.nutch.pagedb.*;
import junit.framework.TestCase;
/** Unit tests for ParseText. */