You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2014/06/20 04:13:27 UTC

svn commit: r1604072 - in /nutch/branches/2.x: ./ src/java/org/apache/nutch/crawl/ src/plugin/parse-html/src/test/org/apache/nutch/parse/html/ src/plugin/parse-metatags/src/test/org/apache/nutch/parse/ src/plugin/parse-tika/src/test/org/apache/nutch/pa...

Author: lewismc
Date: Fri Jun 20 02:13:26 2014
New Revision: 1604072

URL: http://svn.apache.org/r1604072
Log:
NUTCH-1796 Ensure Gora object builders are used as oppose to empty constructors

Modified:
    nutch/branches/2.x/CHANGES.txt
    nutch/branches/2.x/src/java/org/apache/nutch/crawl/DbUpdateReducer.java
    nutch/branches/2.x/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java
    nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/TestMetaTagsParser.java
    nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestImageMetadata.java
    nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
    nutch/branches/2.x/src/plugin/scoring-opic/src/test/org/apache/nutch/scoring/opic/TestOPICScoringFilter.java
    nutch/branches/2.x/src/plugin/tld/src/test/org/apache/nutch/indexer/tld/TestTLDIndexingFilter.java
    nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java

Modified: nutch/branches/2.x/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/CHANGES.txt?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/CHANGES.txt (original)
+++ nutch/branches/2.x/CHANGES.txt Fri Jun 20 02:13:26 2014
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Current Development
 
+* NUTCH-1796 Ensure Gora object builders are used as oppose to empty constructors (snagel via lewismc)
+
 * NUTCH-1590 [SECURITY] Frame injection vulnerability in published Javadoc (jnioche)
 
 * NUTCH-1736 Can't fetch page if http response header contains Transfer-Encoding:chunked (ysc via jnioche)

Modified: nutch/branches/2.x/src/java/org/apache/nutch/crawl/DbUpdateReducer.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/java/org/apache/nutch/crawl/DbUpdateReducer.java?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/src/java/org/apache/nutch/crawl/DbUpdateReducer.java (original)
+++ nutch/branches/2.x/src/java/org/apache/nutch/crawl/DbUpdateReducer.java Fri Jun 20 02:13:26 2014
@@ -23,7 +23,6 @@ import java.util.List;
 
 import org.apache.avro.util.Utf8;
 import org.apache.gora.mapreduce.GoraReducer;
-import org.apache.gora.store.DataStore;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.util.StringUtils;

Modified: nutch/branches/2.x/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java (original)
+++ nutch/branches/2.x/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java Fri Jun 20 02:13:26 2014
@@ -22,18 +22,16 @@ import java.nio.charset.Charset;
 
 import org.apache.avro.util.Utf8;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.Parser;
-import org.apache.nutch.protocol.Content;
 import org.apache.nutch.storage.WebPage;
 import org.apache.nutch.util.Bytes;
 import org.apache.nutch.util.NutchConfiguration;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-
 import org.junit.Before;
 import org.junit.Test;
+
 import static org.junit.Assert.*;
 
 public class TestHtmlParser {
@@ -111,7 +109,7 @@ public class TestHtmlParser {
   }
 
   protected WebPage page(byte[] contentBytes) {
-    WebPage page = new WebPage();
+    WebPage page = WebPage.newBuilder().build();
     page.setBaseUrl(new Utf8(dummyUrl));
     page.setContent(ByteBuffer.wrap(contentBytes));
     page.setContentType(new Utf8("text/html"));

Modified: nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/TestMetaTagsParser.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/TestMetaTagsParser.java?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/TestMetaTagsParser.java (original)
+++ nutch/branches/2.x/src/plugin/parse-metatags/src/test/org/apache/nutch/parse/TestMetaTagsParser.java Fri Jun 20 02:13:26 2014
@@ -76,7 +76,7 @@ public class TestMetaTagsParser {
       in.readFully(bytes);
       in.close();
 
-      WebPage page = new WebPage();
+      WebPage page = WebPage.newBuilder().build();
       page.setBaseUrl(new Utf8(urlString));
       page.setContent(ByteBuffer.wrap(bytes));
       page.setContentType(new Utf8("text/html"));

Modified: nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestImageMetadata.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestImageMetadata.java?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestImageMetadata.java (original)
+++ nutch/branches/2.x/src/plugin/parse-tika/src/test/org/apache/nutch/parse/tika/TestImageMetadata.java Fri Jun 20 02:13:26 2014
@@ -60,7 +60,7 @@ public class TestImageMetadata {
       in.readFully(bytes);
       in.close();
       
-      WebPage page = new WebPage();
+      WebPage page = WebPage.newBuilder().build();
       page.setBaseUrl(new Utf8(urlString));
       page.setContent(ByteBuffer.wrap(bytes));
       String mtype = mimeutil.getMimeType(file);

Modified: nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java (original)
+++ nutch/branches/2.x/src/plugin/protocol-http/src/test/org/apache/nutch/protocol/http/TestProtocolHttp.java Fri Jun 20 02:13:26 2014
@@ -119,8 +119,9 @@ public class TestProtocolHttp {
    */
   private void fetchPage(String page, int expectedCode) throws Exception {
     URL url = new URL("http", "127.0.0.1", port, page);
-    Response response = http.getResponse(url, new WebPage(), true);
-    ProtocolOutput out = http.getProtocolOutput(url.toString(), new WebPage());
+    WebPage p = WebPage.newBuilder().build();
+    Response response = http.getResponse(url, p, true);
+    ProtocolOutput out = http.getProtocolOutput(url.toString(), p);
     Content content = out.getContent();
     
     assertEquals("HTTP Status Code for " + url, expectedCode, response.getCode());

Modified: nutch/branches/2.x/src/plugin/scoring-opic/src/test/org/apache/nutch/scoring/opic/TestOPICScoringFilter.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/scoring-opic/src/test/org/apache/nutch/scoring/opic/TestOPICScoringFilter.java?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/scoring-opic/src/test/org/apache/nutch/scoring/opic/TestOPICScoringFilter.java (original)
+++ nutch/branches/2.x/src/plugin/scoring-opic/src/test/org/apache/nutch/scoring/opic/TestOPICScoringFilter.java Fri Jun 20 02:13:26 2014
@@ -25,7 +25,6 @@ import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 
-import org.apache.avro.util.Utf8;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.scoring.ScoreDatum;
 import org.apache.nutch.storage.WebPage;
@@ -148,7 +147,7 @@ public class TestOPICScoringFilter {
 
     // injecting seed list, with scored attached to webpages
     for (String url : self.seedList) {
-      WebPage row = new WebPage();
+      WebPage row = WebPage.newBuilder().build();
       row.setScore(scoreInjected);
       scoringFilter.injectedScore(url, row);
 
@@ -191,15 +190,15 @@ public class TestOPICScoringFilter {
         // getting outlinks from testdata
         String[] seedOutlinks = self.linkList.get(url);
         for (String seedOutlink : seedOutlinks) {
-          row.putToOutlinks(new Utf8(seedOutlink), new Utf8());
+          row.getOutlinks().put(seedOutlink, "");
         }
 
         self.outlinkedScoreData.clear();
 
         // Existing outlinks are added to outlinkedScoreData
-        Map<Utf8, Utf8> outlinks = row.getOutlinks();
+        Map<CharSequence, CharSequence> outlinks = row.getOutlinks();
         if (outlinks != null) {
-          for (Entry<Utf8, Utf8> e : outlinks.entrySet()) {
+          for (Entry<CharSequence, CharSequence> e : outlinks.entrySet()) {
             int depth = Integer.MAX_VALUE;
             self.outlinkedScoreData.add(new ScoreDatum(0.0f, e.getKey()
                 .toString(), e.getValue().toString(), depth));
@@ -213,7 +212,7 @@ public class TestOPICScoringFilter {
           if (dbWebPages.get(TableUtil.reverseUrl(sc.getUrl())) == null) {
             // Check each outlink and creates new webpages if it's not
             // exist in database (dbWebPages)
-            WebPage outlinkRow = new WebPage();
+            WebPage outlinkRow = WebPage.newBuilder().build();
             scoringFilter.initialScore(sc.getUrl(), outlinkRow);
             List<ScoreDatum> newScoreList = new LinkedList<ScoreDatum>();
             newScoreList.add(sc);

Modified: nutch/branches/2.x/src/plugin/tld/src/test/org/apache/nutch/indexer/tld/TestTLDIndexingFilter.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/tld/src/test/org/apache/nutch/indexer/tld/TestTLDIndexingFilter.java?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/tld/src/test/org/apache/nutch/indexer/tld/TestTLDIndexingFilter.java (original)
+++ nutch/branches/2.x/src/plugin/tld/src/test/org/apache/nutch/indexer/tld/TestTLDIndexingFilter.java Fri Jun 20 02:13:26 2014
@@ -92,7 +92,7 @@ public class TestTLDIndexingFilter {
     urls.put("ftp://w.info.nf/", "info.nf");
     urls.put("file://x.aa.no", "aa.no");
 
-    WebPage page = new WebPage();
+    WebPage page = WebPage.newBuilder().build();
 
     TLDIndexingFilter filter = new TLDIndexingFilter();
     assertNotNull(filter);

Modified: nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java?rev=1604072&r1=1604071&r2=1604072&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java Fri Jun 20 02:13:26 2014
@@ -83,7 +83,7 @@ public class TestAdaptiveFetchSchedule e
    * @return wp :Webpage
    */
   public WebPage prepareWebpage() {
-    WebPage wp = new WebPage();
+    WebPage wp = WebPage.newBuilder().build();
     wp.setStatus(1);
     wp.setFetchInterval(interval);
     wp.setScore(1.0f);