You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by jn...@apache.org on 2014/05/15 10:10:09 UTC
svn commit: r1594812 [5/5] - in /nutch/branches/2.x: ./ ivy/ src/gora/
src/java/org/apache/nutch/api/ src/java/org/apache/nutch/crawl/
src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/host/
src/java/org/apache/nutch/indexer/ src/java/org/ap...
Modified: nutch/branches/2.x/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java?rev=1594812&r1=1594811&r2=1594812&view=diff
==============================================================================
--- nutch/branches/2.x/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java (original)
+++ nutch/branches/2.x/src/plugin/scoring-opic/src/java/org/apache/nutch/scoring/opic/OPICScoringFilter.java Thu May 15 08:10:07 2014
@@ -17,17 +17,7 @@
package org.apache.nutch.scoring.opic;
-import java.net.MalformedURLException;
-import java.net.URL;
-import java.nio.ByteBuffer;
-import java.util.Collection;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
import org.apache.avro.util.Utf8;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.indexer.NutchDocument;
import org.apache.nutch.scoring.ScoreDatum;
@@ -35,6 +25,16 @@ import org.apache.nutch.scoring.ScoringF
import org.apache.nutch.scoring.ScoringFilterException;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.Bytes;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.nio.ByteBuffer;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
/**
* This plugin implements a variant of an Online Page Importance Computation
@@ -82,7 +82,7 @@ public class OPICScoringFilter implement
public void injectedScore(String url, WebPage row)
throws ScoringFilterException {
float score = row.getScore();
- row.putToMetadata(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(score)));
+ row.getMetadata().put(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(score)));
}
/** Set to 0.0f (unknown value) - inlink contributions will bring it to
@@ -90,7 +90,7 @@ public class OPICScoringFilter implement
@Override
public void initialScore(String url, WebPage row) throws ScoringFilterException {
row.setScore(0.0f);
- row.putToMetadata(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(0.0f)));
+ row.getMetadata().put(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(0.0f)));
}
/** Use {@link WebPage#getScore()}. */
@@ -108,12 +108,12 @@ public class OPICScoringFilter implement
}
float oldScore = row.getScore();
row.setScore(oldScore + adjust);
- ByteBuffer cashRaw = row.getFromMetadata(CASH_KEY);
+ ByteBuffer cashRaw = row.getMetadata().get(CASH_KEY);
float cash = 0.0f;
if (cashRaw != null) {
cash = Bytes.toFloat(cashRaw.array(), cashRaw.arrayOffset() + cashRaw.position());
}
- row.putToMetadata(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(cash + adjust)));
+ row.getMetadata().put(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(cash + adjust)));
}
/** Get cash on hand, divide it by the number of outlinks and apply. */
@@ -121,7 +121,7 @@ public class OPICScoringFilter implement
public void distributeScoreToOutlinks(String fromUrl,
WebPage row, Collection<ScoreDatum> scoreData,
int allCount) {
- ByteBuffer cashRaw = row.getFromMetadata(CASH_KEY);
+ ByteBuffer cashRaw = row.getMetadata().get(CASH_KEY);
if (cashRaw == null) {
return;
}
@@ -149,7 +149,7 @@ public class OPICScoringFilter implement
}
}
// reset cash to zero
- row.putToMetadata(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(0.0f)));
+ row.getMetadata().put(CASH_KEY, ByteBuffer.wrap(Bytes.toBytes(0.0f)));
}
/** Dampen the boost value by scorePower.*/
Modified: nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java?rev=1594812&r1=1594811&r2=1594812&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestAdaptiveFetchSchedule.java Thu May 15 08:10:07 2014
@@ -87,7 +87,7 @@ public class TestAdaptiveFetchSchedule e
wp.setStatus(1);
wp.setFetchInterval(interval);
wp.setScore(1.0f);
- wp.setFetchTime(0);
+ wp.setFetchTime(0L);
return wp;
}
Modified: nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestGenerator.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestGenerator.java?rev=1594812&r1=1594811&r2=1594812&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestGenerator.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestGenerator.java Thu May 15 08:10:07 2014
@@ -16,13 +16,6 @@
*/
package org.apache.nutch.crawl;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.storage.Mark;
import org.apache.nutch.storage.WebPage;
@@ -33,7 +26,15 @@ import org.junit.After;
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
-import static org.junit.Assert.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+
+import static org.junit.Assert.assertEquals;
/**
* Basic generator test. 1. Insert entries in webtable 2. Generates entries to
@@ -294,10 +295,10 @@ public class TestGenerator extends Abstr
*/
private URLWebPage createURLWebPage(final String url,
final int fetchInterval, final float score) {
- WebPage page = new WebPage();
+ WebPage page = WebPage.newBuilder().build();
page.setFetchInterval(fetchInterval);
page.setScore(score);
- page.setStatus(CrawlStatus.STATUS_UNFETCHED);
+ page.setStatus((int)CrawlStatus.STATUS_UNFETCHED);
return new URLWebPage(url, page);
}
Modified: nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestInjector.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestInjector.java?rev=1594812&r1=1594811&r2=1594812&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestInjector.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestInjector.java Thu May 15 08:10:07 2014
@@ -16,11 +16,6 @@
*/
package org.apache.nutch.crawl;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.List;
-
import org.apache.avro.util.Utf8;
import org.apache.hadoop.fs.Path;
import org.apache.nutch.storage.WebPage;
@@ -30,7 +25,14 @@ import org.apache.nutch.util.CrawlTestUt
import org.junit.Before;
import org.junit.Ignore;
import org.junit.Test;
-import static org.junit.Assert.*;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
/**
* Basic injector test: 1. Creates a text file with urls 2. Injects them into
@@ -110,8 +112,8 @@ public class TestInjector extends Abstra
for (URLWebPage up : pages) {
WebPage page = up.getDatum();
String representation = up.getUrl();
- representation += "\tnutch.score=" + (int)page.getScore();
- ByteBuffer bb = page.getFromMetadata(new Utf8("custom.attribute"));
+ representation += "\tnutch.score=" + page.getScore().intValue();
+ ByteBuffer bb = page.getMetadata().get(new Utf8("custom.attribute"));
if (bb != null) {
representation += "\tcustom.attribute=" + Bytes.toString(bb);
}
Modified: nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestURLPartitioner.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestURLPartitioner.java?rev=1594812&r1=1594811&r2=1594812&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestURLPartitioner.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/crawl/TestURLPartitioner.java Thu May 15 08:10:07 2014
@@ -16,11 +16,6 @@
******************************************************************************/
package org.apache.nutch.crawl;
-import java.net.MalformedURLException;
-
-import org.junit.Test;
-import static org.junit.Assert.*;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IntWritable;
import org.apache.nutch.crawl.GeneratorJob.SelectorEntry;
@@ -30,6 +25,12 @@ import org.apache.nutch.fetcher.FetchEnt
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.TableUtil;
+import org.junit.Test;
+
+import java.net.MalformedURLException;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotSame;
/**
* Tests {@link URLPartitioner}
@@ -168,7 +169,7 @@ public class TestURLPartitioner {
int partitionFromRef = refPartitioner.getPartition("http://www.example.org/", numReduceTasks);
//init selector entry (score shouldn't matter)
SelectorEntry selectorEntry = new SelectorEntry("http://www.example.org/", 1337);
- WebPage page = new WebPage();
+ WebPage page = WebPage.newBuilder().build();
int partitionFromSig = sigPartitioner.getPartition(selectorEntry, page, numReduceTasks);
assertEquals("partitions should be same",
@@ -199,7 +200,7 @@ public class TestURLPartitioner {
int partitionFromRef = refPartitioner.getPartition("http://www.example.org/", numReduceTasks);
IntWritable intWritable = new IntWritable(1337); //doesn't matter
- WebPage page = new WebPage();
+ WebPage page = WebPage.newBuilder().build();
String key = TableUtil.reverseUrl("http://www.example.org/");
FetchEntry fetchEntry = new FetchEntry(conf, key, page);
int partitionFromSig = sigPartitioner.getPartition(intWritable, fetchEntry, numReduceTasks);
Modified: nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java?rev=1594812&r1=1594811&r2=1594812&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/indexer/TestIndexingFilters.java Thu May 15 08:10:07 2014
@@ -16,14 +16,15 @@
*/
package org.apache.nutch.indexer;
-import org.junit.Test;
-import static org.junit.Assert.*;
-
import org.apache.avro.util.Utf8;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.storage.WebPage;
import org.apache.nutch.util.NutchConfiguration;
+import org.junit.Test;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
public class TestIndexingFilters {
@@ -42,7 +43,7 @@ public class TestIndexingFilters {
conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1 + " " + class2);
IndexingFilters filters = new IndexingFilters(conf);
- WebPage page = new WebPage();
+ WebPage page = WebPage.newBuilder().build();
page.setText(new Utf8("text"));
page.setTitle(new Utf8("title"));
filters.filter(new NutchDocument(),"http://www.example.com/",page);
@@ -59,7 +60,7 @@ public class TestIndexingFilters {
conf.addResource("crawl-tests.xml");
IndexingFilters filters = new IndexingFilters(conf);
- WebPage page = new WebPage();
+ WebPage page = WebPage.newBuilder().build();
page.setText(new Utf8("text"));
page.setTitle(new Utf8("title"));
NutchDocument doc = filters.filter(null,"http://www.example.com/",page);
@@ -82,7 +83,7 @@ public class TestIndexingFilters {
conf.set(IndexingFilters.INDEXINGFILTER_ORDER, class1);
IndexingFilters filters1 = new IndexingFilters(conf);
- WebPage page = new WebPage();
+ WebPage page = WebPage.newBuilder().build();
page.setText(new Utf8("text"));
page.setTitle(new Utf8("title"));
NutchDocument fdoc1 = filters1.filter(new NutchDocument(),"http://www.example.com/",page);
Modified: nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java?rev=1594812&r1=1594811&r2=1594812&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/storage/TestGoraStorage.java Thu May 15 08:10:07 2014
@@ -16,16 +16,6 @@
******************************************************************************/
package org.apache.nutch.storage;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.concurrent.Callable;
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.Future;
-
import org.apache.avro.util.Utf8;
import org.apache.commons.io.IOUtils;
import org.apache.gora.query.Result;
@@ -34,12 +24,23 @@ import org.apache.hadoop.conf.Configurat
import org.apache.nutch.util.AbstractNutchTest;
import org.apache.nutch.util.CrawlTestUtil;
import org.hsqldb.Server;
-import org.junit.Ignore;
-
import org.junit.After;
import org.junit.Before;
+import org.junit.Ignore;
import org.junit.Test;
-import static org.junit.Assert.*;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
/**
* Tests basic Gora functionality by writing and reading webpages.
@@ -71,7 +72,7 @@ public class TestGoraStorage extends Abs
private static void readWrite(String id, DataStore<String, WebPage> store)
throws IOException, Exception {
- WebPage page = new WebPage();
+ WebPage page = WebPage.newBuilder().build();
int max = 1000;
for (int i = 0; i < max; i++) {
// store a page with title
Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java?rev=1594812&r1=1594811&r2=1594812&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/util/CrawlTestUtil.java Thu May 15 08:10:07 2014
@@ -16,14 +16,9 @@
*/
package org.apache.nutch.util;
-import java.io.IOException;
-import java.net.UnknownHostException;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+import org.apache.gora.query.Query;
+import org.apache.gora.query.Result;
+import org.apache.gora.store.DataStore;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
@@ -31,14 +26,19 @@ import org.apache.hadoop.fs.Path;
import org.apache.nutch.crawl.URLWebPage;
import org.apache.nutch.storage.Mark;
import org.apache.nutch.storage.WebPage;
-import org.apache.gora.query.Query;
-import org.apache.gora.query.Result;
-import org.apache.gora.store.DataStore;
import org.mortbay.jetty.Handler;
import org.mortbay.jetty.Server;
import org.mortbay.jetty.handler.DefaultHandler;
import org.mortbay.jetty.handler.HandlerList;
import org.mortbay.jetty.handler.ResourceHandler;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.net.UnknownHostException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
public class CrawlTestUtil {
@@ -121,7 +121,7 @@ public class CrawlTestUtil {
if (requiredMark != null && requiredMark.checkMark(page) == null)
continue;
- l.add(new URLWebPage(TableUtil.unreverseUrl(url), (WebPage)page.clone()));
+ l.add(new URLWebPage(TableUtil.unreverseUrl(url), WebPage.newBuilder(page).build()));
} catch (Exception e) {
e.printStackTrace();
}
Modified: nutch/branches/2.x/src/test/org/apache/nutch/util/TestEncodingDetector.java
URL: http://svn.apache.org/viewvc/nutch/branches/2.x/src/test/org/apache/nutch/util/TestEncodingDetector.java?rev=1594812&r1=1594811&r2=1594812&view=diff
==============================================================================
--- nutch/branches/2.x/src/test/org/apache/nutch/util/TestEncodingDetector.java (original)
+++ nutch/branches/2.x/src/test/org/apache/nutch/util/TestEncodingDetector.java Thu May 15 08:10:07 2014
@@ -16,16 +16,16 @@
*/
package org.apache.nutch.util;
-import java.io.UnsupportedEncodingException;
-import java.nio.ByteBuffer;
-
-import org.junit.Test;
-import static org.junit.Assert.*;
-
import org.apache.avro.util.Utf8;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.storage.WebPage;
+import org.junit.Test;
+
+import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+
+import static org.junit.Assert.assertEquals;
public class TestEncodingDetector {
private static Configuration conf = NutchConfiguration.create();
@@ -50,7 +50,7 @@ public class TestEncodingDetector {
// Content content;
String encoding;
- WebPage page = new WebPage();
+ WebPage page = WebPage.newBuilder().build();
page.setBaseUrl(new Utf8("http://www.example.com/"));
page.setContentType(new Utf8("text/plain"));
page.setContent(ByteBuffer.wrap(contentInOctets));
@@ -61,18 +61,18 @@ public class TestEncodingDetector {
// no information is available, so it should return default encoding
assertEquals("windows-1252", encoding.toLowerCase());
- page = new WebPage();
+ page = WebPage.newBuilder().build();
page.setBaseUrl(new Utf8("http://www.example.com/"));
page.setContentType(new Utf8("text/plain"));
page.setContent(ByteBuffer.wrap(contentInOctets));
- page.putToHeaders(EncodingDetector.CONTENT_TYPE_UTF8, new Utf8("text/plain; charset=UTF-16"));
+ page.getHeaders().put(EncodingDetector.CONTENT_TYPE_UTF8, new Utf8("text/plain; charset=UTF-16"));
detector = new EncodingDetector(conf);
detector.autoDetectClues(page, true);
encoding = detector.guessEncoding(page, "windows-1252");
assertEquals("utf-16", encoding.toLowerCase());
- page = new WebPage();
+ page = WebPage.newBuilder().build();
page.setBaseUrl(new Utf8("http://www.example.com/"));
page.setContentType(new Utf8("text/plain"));
page.setContent(ByteBuffer.wrap(contentInOctets));
@@ -85,11 +85,11 @@ public class TestEncodingDetector {
// enable autodetection
conf.setInt(EncodingDetector.MIN_CONFIDENCE_KEY, 50);
- page = new WebPage();
+ page = WebPage.newBuilder().build();
page.setBaseUrl(new Utf8("http://www.example.com/"));
page.setContentType(new Utf8("text/plain"));
page.setContent(ByteBuffer.wrap(contentInOctets));
- page.putToMetadata(new Utf8(Response.CONTENT_TYPE), ByteBuffer.wrap("text/plain; charset=UTF-16".getBytes()));
+ page.getMetadata().put(new Utf8(Response.CONTENT_TYPE), ByteBuffer.wrap("text/plain; charset=UTF-16".getBytes()));
detector = new EncodingDetector(conf);
detector.autoDetectClues(page, true);