You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2014/06/20 04:13:27 UTC
svn commit: r1604072 - in /nutch/branches/2.x: ./
src/java/org/apache/nutch/crawl/
src/plugin/parse-html/src/test/org/apache/nutch/parse/html/
src/plugin/parse-metatags/src/test/org/apache/nutch/parse/
src/plugin/parse-tika/src/test/org/apache/nutch/pa...
Author: lewismc
Date: Fri Jun 20 02:13:26 2014
New Revision: 1604072
URL: http://svn.apache.org/r1604072
Log:
NUTCH-1796 Ensure Gora object builders are used as oppose to empty constructors
Modified:
nutch/branches/2.x/CHANGES.txt
nutch/branches/2.x/src/java/org/apache/nutch/crawl/DbUpdateReducer.java
nutch/branches/2.x/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java
nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/TestMetaTagsParser.java
nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestImageMetadata.java
nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
nutch/branches/2.x/src/plugin/scoring-opic/src/test/org/apache/nutch/scoring/opic/TestOPICScoringFilter.java
nutch/branches/2.x/src/plugin/tld/src/test/org/apache/nutch/indexer/tld/TestTLDIndexingFilter.java
nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java
Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Fri Jun 20 02:13:26 2014
@@ -2,6 +2,8 @@ Nutch Change Log
Current Development
+* NUTCH-1796 Ensure Gora object builders are used as oppose to empty constructors (snagel via lewismc)
+
* NUTCH-1590 [SECURITY] Frame injection vulnerability in published Javadoc (jnioche)
* NUTCH-1736 Can't fetch page if http response header contains Transfer-Encodingï¼chunked (ysc via jnioche)
Modified: nutch/branches/2.x/src/java/org/apache/nutch/crawl/DbUpdateReducer.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/crawl/DbUpdateReducer.java?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/crawl/DbUpdateReducer.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/crawl/DbUpdateReducer.java Fri Jun 20 02:13:26 2014
@@ -23,7 +23,6 @@ import java.util.List;
import org.apache.avro.util.Utf8;
import org.apache.gora.mapreduce.GoraReducer;
-import org.apache.gora.store.DataStore;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.StringUtils;
Modified: nutch/branches/2.x/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java (original)
+++ nutch/branches/2.x/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java Fri Jun 20 02:13:26 2014
@@ -22,18 +22,16 @@ import java.nio.charset.Charset;
import org.apache.avro.util.Utf8;
import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.Parser;
-import org.apache.nutch.protocol.Content;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.Bytes;
import org.apache.nutch.util.NutchConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-
import org.junit.Before;
import org.junit.Test;
+
import static org.junit.Assert.*;
public class TestHtmlParser {
@@ -111,7 +109,7 @@ public class TestHtmlParser {
}
protected WebPage page(byte[] contentBytes) {
- WebPage page = new WebPage();
+ WebPage page = WebPage.newBuilder().build();
page.setBaseUrl(new Utf8(dummyUrl));
page.setContent(ByteBuffer.wrap(contentBytes));
page.setContentType(new Utf8("text/html"));
Modified: nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/TestMetaTagsParser.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/TestMetaTagsParser.java?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/TestMetaTagsParser.java (original)
+++ nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/TestMetaTagsParser.java Fri Jun 20 02:13:26 2014
@@ -76,7 +76,7 @@ public class TestMetaTagsParser {
in.readFully(bytes);
in.close();
- WebPage page = new WebPage();
+ WebPage page = WebPage.newBuilder().build();
page.setBaseUrl(new Utf8(urlString));
page.setContent(ByteBuffer.wrap(bytes));
page.setContentType(new Utf8("text/html"));
Modified: nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestImageMetadata.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestImageMetadata.java?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestImageMetadata.java (original)
+++ nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestImageMetadata.java Fri Jun 20 02:13:26 2014
@@ -60,7 +60,7 @@ public class TestImageMetadata {
in.readFully(bytes);
in.close();
- WebPage page = new WebPage();
+ WebPage page = WebPage.newBuilder().build();
page.setBaseUrl(new Utf8(urlString));
page.setContent(ByteBuffer.wrap(bytes));
String mtype = mimeutil.getMimeType(file);
Modified: nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java Fri Jun 20 02:13:26 2014
@@ -119,8 +119,9 @@ public class TestProtocolHttp {
*/
private void fetchPage(String page, int expectedCode) throws Exception {
URL url = new URL("http", "127.0.0.1", port, page);
- Response response = http.getResponse(url, new WebPage(), true);
- ProtocolOutput out = http.getProtocolOutput(url.toString(), new WebPage());
+ WebPage p = WebPage.newBuilder().build();
+ Response response = http.getResponse(url, p, true);
+ ProtocolOutput out = http.getProtocolOutput(url.toString(), p);
Content content = out.getContent();
assertEquals("HTTP Status Code for " + url, expectedCode, response.getCode());
Modified: nutch/branches/2.x/src/plugin/scoring-opic/src/test/org/apache/nutch/scoring/opic/TestOPICScoringFilter.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/scoring-opic/src/test/org/apache/nutch/scoring/opic/TestOPICScoringFilter.java?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/scoring-opic/src/test/org/apache/nutch/scoring/opic/TestOPICScoringFilter.java (original)
+++ nutch/branches/2.x/src/plugin/scoring-opic/src/test/org/apache/nutch/scoring/opic/TestOPICScoringFilter.java Fri Jun 20 02:13:26 2014
@@ -25,7 +25,6 @@ import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
-import org.apache.avro.util.Utf8;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.scoring.ScoreDatum;
import org.apache.nutch.storage.WebPage;
@@ -148,7 +147,7 @@ public class TestOPICScoringFilter {
// injecting seed list, with scored attached to webpages
for (String url : self.seedList) {
- WebPage row = new WebPage();
+ WebPage row = WebPage.newBuilder().build();
row.setScore(scoreInjected);
scoringFilter.injectedScore(url, row);
@@ -191,15 +190,15 @@ public class TestOPICScoringFilter {
// getting outlinks from testdata
String[] seedOutlinks = self.linkList.get(url);
for (String seedOutlink : seedOutlinks) {
- row.putToOutlinks(new Utf8(seedOutlink), new Utf8());
+ row.getOutlinks().put(seedOutlink, "");
}
self.outlinkedScoreData.clear();
// Existing outlinks are added to outlinkedScoreData
- Map<Utf8, Utf8> outlinks = row.getOutlinks();
+ Map<CharSequence, CharSequence> outlinks = row.getOutlinks();
if (outlinks != null) {
- for (Entry<Utf8, Utf8> e : outlinks.entrySet()) {
+ for (Entry<CharSequence, CharSequence> e : outlinks.entrySet()) {
int depth = Integer.MAX_VALUE;
self.outlinkedScoreData.add(new ScoreDatum(0.0f, e.getKey()
.toString(), e.getValue().toString(), depth));
@@ -213,7 +212,7 @@ public class TestOPICScoringFilter {
if (dbWebPages.get(TableUtil.reverseUrl(sc.getUrl())) == null) {
// Check each outlink and creates new webpages if it's not
// exist in database (dbWebPages)
- WebPage outlinkRow = new WebPage();
+ WebPage outlinkRow = WebPage.newBuilder().build();
scoringFilter.initialScore(sc.getUrl(), outlinkRow);
List<ScoreDatum> newScoreList = new LinkedList<ScoreDatum>();
newScoreList.add(sc);
Modified: nutch/branches/2.x/src/plugin/tld/src/test/org/apache/nutch/indexer/tld/TestTLDIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/tld/src/test/org/apache/nutch/indexer/tld/TestTLDIndexingFilter.java?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/tld/src/test/org/apache/nutch/indexer/tld/TestTLDIndexingFilter.java (original)
+++ nutch/branches/2.x/src/plugin/tld/src/test/org/apache/nutch/indexer/tld/TestTLDIndexingFilter.java Fri Jun 20 02:13:26 2014
@@ -92,7 +92,7 @@ public class TestTLDIndexingFilter {
urls.put("ftp://w.info.nf/", "info.nf");
urls.put("file://x.aa.no", "aa.no");
- WebPage page = new WebPage();
+ WebPage page = WebPage.newBuilder().build();
TLDIndexingFilter filter = new TLDIndexingFilter();
assertNotNull(filter);
Modified: nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java Fri Jun 20 02:13:26 2014
@@ -83,7 +83,7 @@ public class TestAdaptiveFetchSchedule e
* @return wp :Webpage
*/
public WebPage prepareWebpage() {
- WebPage wp = new WebPage();
+ WebPage wp = WebPage.newBuilder().build();
wp.setStatus(1);
wp.setFetchInterval(interval);
wp.setScore(1.0f);