You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2012/01/14 18:24:49 UTC
svn commit: r1231532 - in /nutch/trunk/src/java/org/apache/nutch:
crawl/AbstractFetchSchedule.java crawl/FetchSchedule.java
parse/ParseResult.java
Author: lewismc
Date: Sat Jan 14 17:24:49 2012
New Revision: 1231532
URL: http://svn.apache.org/viewvc?rev=1231532&view=rev
Log:
NUTCH-1176 commit
Modified:
nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
nutch/trunk/src/java/org/apache/nutch/crawl/FetchSchedule.java
nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java?rev=1231532&r1=1231531&r2=1231532&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java Sat Jan 14 17:24:49 2012
@@ -26,7 +26,7 @@ import org.apache.nutch.crawl.CrawlDatum
/**
* This class provides common methods for implementations of
- * {@link FetchSchedule}.
+ * <code>FetchSchedule</code>.
*
* @author Andrzej Bialecki
*/
@@ -98,9 +98,9 @@ public abstract class AbstractFetchSched
* @param datum datum instance to be adjusted.
*
* @return adjusted page information, including all original information.
- * NOTE: this may be a different instance than {@see datum}, but
+ * NOTE: this may be a different instance than @see CrawlDatum, but
* implementations should make sure that it contains at least all
- * information from {@see datum}.
+ * information from @see CrawlDatum.
*/
public CrawlDatum setPageGoneSchedule(Text url, CrawlDatum datum,
long prevFetchTime, long prevModifiedTime, long fetchTime) {
@@ -129,9 +129,9 @@ public abstract class AbstractFetchSched
* @param fetchTime current fetch time.
*
* @return adjusted page information, including all original information.
- * NOTE: this may be a different instance than {@see datum}, but
+ * NOTE: this may be a different instance than @see CrawlDatum, but
* implementations should make sure that it contains at least all
- * information from {@see datum}.
+ * information from @see CrawlDatum.
*/
public CrawlDatum setPageRetrySchedule(Text url, CrawlDatum datum,
long prevFetchTime, long prevModifiedTime, long fetchTime) {
@@ -154,7 +154,7 @@ public abstract class AbstractFetchSched
* guarantee that the page will be fetched, it just allows it to be
* included in the further selection process based on scores. The default
* implementation checks <code>fetchTime</code>, if it is higher than the
- * {@param curTime} it returns false, and true otherwise. It will also
+ * <code>curTime</code> it returns false, and true otherwise. It will also
* check that fetchTime is not too remote (more than <code>maxInterval</code>,
* in which case it lowers the interval and returns true.
*
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/FetchSchedule.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/FetchSchedule.java?rev=1231532&r1=1231531&r2=1231532&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/FetchSchedule.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/FetchSchedule.java Sat Jan 14 17:24:49 2012
@@ -47,9 +47,9 @@ public interface FetchSchedule extends C
* @param datum datum instance to be initialized.
*
* @return adjusted page information, including all original information.
- * NOTE: this may be a different instance than {@see datum}, but
+ * NOTE: this may be a different instance than @see CrawlDatum, but
* implementations should make sure that it contains at least all
- * information from {@see datum}.
+ * information from @see CrawlDatum.
*/
public CrawlDatum initializeSchedule(Text url, CrawlDatum datum);
@@ -69,11 +69,11 @@ public interface FetchSchedule extends C
* @param prevModifiedTime previous value of modifiedTime, or 0 if not available.
*
* @param fetchTime the latest time, when the page was recently re-fetched. Most FetchSchedule
- * implementations should update the value in {@see datum} to something greater than this value.
+ * implementations should update the value in @see CrawlDatum to something greater than this value.
*
* @param modifiedTime last time the content was modified. This information comes from
* the protocol implementations, or is set to < 0 if not available. Most FetchSchedule
- * implementations should update the value in {@see datum} to this value.
+ * implementations should update the value in @see CrawlDatum to this value.
*
* @param state if {@link #STATUS_MODIFIED}, then the content is considered to be "changed" before the
* <code>fetchTime</code>, if {@link #STATUS_NOTMODIFIED} then the content is known to be unchanged.
@@ -82,8 +82,8 @@ public interface FetchSchedule extends C
* are free to follow a sensible default behavior.
*
* @return adjusted page information, including all original information. NOTE: this may
- * be a different instance than {@see datum}, but implementations should make sure that
- * it contains at least all information from {@see datum}.
+ * be a different instance than @see CrawlDatum, but implementations should make sure that
+ * it contains at least all information from @see CrawlDatum}.
*/
public CrawlDatum setFetchSchedule(Text url, CrawlDatum datum,
long prevFetchTime, long prevModifiedTime,
@@ -100,9 +100,9 @@ public interface FetchSchedule extends C
* @param datum datum instance to be adjusted.
*
* @return adjusted page information, including all original information.
- * NOTE: this may be a different instance than {@see datum}, but
+ * NOTE: this may be a different instance than @see CrawlDatum, but
* implementations should make sure that it contains at least all
- * information from {@see datum}.
+ * information from @see CrawlDatum.
*/
public CrawlDatum setPageGoneSchedule(Text url, CrawlDatum datum,
long prevFetchTime, long prevModifiedTime, long fetchTime);
@@ -124,9 +124,9 @@ public interface FetchSchedule extends C
* @param fetchTime current fetch time.
*
* @return adjusted page information, including all original information.
- * NOTE: this may be a different instance than {@see datum}, but
+ * NOTE: this may be a different instance than @see CrawlDatum, but
* implementations should make sure that it contains at least all
- * information from {@see datum}.
+ * information from @see CrawlDatum.
*/
public CrawlDatum setPageRetrySchedule(Text url, CrawlDatum datum,
long prevFetchTime, long prevModifiedTime, long fetchTime);
@@ -143,7 +143,7 @@ public interface FetchSchedule extends C
* guarantee that the page will be fetched, it just allows it to be
* included in the further selection process based on scores. The default
* implementation checks <code>fetchTime</code>, if it is higher than the
- * {@see curTime} it returns false, and true otherwise. It will also
+ * curTime it returns false, and true otherwise. It will also
* check that fetchTime is not too remote (more than <code>maxInterval</code),
* in which case it lowers the interval and returns true.
*
@@ -172,9 +172,9 @@ public interface FetchSchedule extends C
* time is set.
*
* @return adjusted page information, including all original information.
- * NOTE: this may be a different instance than {@see datum}, but
+ * NOTE: this may be a different instance than @see CrawlDatum, but
* implementations should make sure that it contains at least all
- * information from {@see datum}.
+ * information from @see CrawlDatum.
*/
public CrawlDatum forceRefetch(Text url, CrawlDatum datum, boolean asap);
}
Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java?rev=1231532&r1=1231531&r2=1231532&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java Sat Jan 14 17:24:49 2012
@@ -53,10 +53,10 @@ public class ParseResult implements Iter
/**
* Convenience method for obtaining {@link ParseResult} from a single
- * {@link Parse} output.
- * @param url canonical url
- * @param parse single parse output
- * @return result containing the single parse output
+ * <code>Parse</code> output.
+ * @param url canonical url.
+ * @param parse single parse output.
+ * @return result containing the single parse output.
*/
public static ParseResult createParseResult(String url, Parse parse) {
ParseResult parseResult = new ParseResult(url);
@@ -126,7 +126,7 @@ public class ParseResult implements Iter
/**
* Remove all results where status is not successful (as determined
- * by {@link ParseStatus#isSuccess()}). Note that effects of this operation
+ * by </code>ParseStatus#isSuccess()</code>). Note that effects of this operation
* cannot be reversed.
*/
public void filter() {
@@ -142,7 +142,7 @@ public class ParseResult implements Iter
/**
* A convenience method which returns true only if all parses are successful.
- * Parse success is determined by {@link ParseStatus#isSuccess()}
+ * Parse success is determined by <code>ParseStatus#isSuccess()</code>.
*/
public boolean isSuccess() {
for(Iterator<Entry<Text, Parse>> i = iterator(); i.hasNext();) {