You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by le...@apache.org on 2012/01/14 18:24:49 UTC

svn commit: r1231532 - in /nutch/trunk/src/java/org/apache/nutch: crawl/AbstractFetchSchedule.java crawl/FetchSchedule.java parse/ParseResult.java

Author: lewismc
Date: Sat Jan 14 17:24:49 2012
New Revision: 1231532

URL: http://svn.apache.org/viewvc?rev=1231532&view=rev
Log:
NUTCH-1176 commit

Modified:
    nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
    nutch/trunk/src/java/org/apache/nutch/crawl/FetchSchedule.java
    nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java?rev=1231532&r1=1231531&r2=1231532&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/AbstractFetchSchedule.java Sat Jan 14 17:24:49 2012
@@ -26,7 +26,7 @@ import org.apache.nutch.crawl.CrawlDatum
 
 /**
  * This class provides common methods for implementations of
- * {@link FetchSchedule}.
+ * <code>FetchSchedule</code>.
  * 
  * @author Andrzej Bialecki
  */
@@ -98,9 +98,9 @@ public abstract class AbstractFetchSched
    * @param datum datum instance to be adjusted.
    *
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@see datum}, but
+   * NOTE: this may be a different instance than @see CrawlDatum, but
    * implementations should make sure that it contains at least all
-   * information from {@see datum}.
+   * information from @see CrawlDatum.
    */
   public CrawlDatum setPageGoneSchedule(Text url, CrawlDatum datum,
           long prevFetchTime, long prevModifiedTime, long fetchTime) {
@@ -129,9 +129,9 @@ public abstract class AbstractFetchSched
    * @param fetchTime current fetch time.
    *
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@see datum}, but
+   * NOTE: this may be a different instance than @see CrawlDatum, but
    * implementations should make sure that it contains at least all
-   * information from {@see datum}.
+   * information from @see CrawlDatum.
    */
   public CrawlDatum setPageRetrySchedule(Text url, CrawlDatum datum,
           long prevFetchTime, long prevModifiedTime, long fetchTime) {
@@ -154,7 +154,7 @@ public abstract class AbstractFetchSched
    * guarantee that the page will be fetched, it just allows it to be
    * included in the further selection process based on scores. The default
    * implementation checks <code>fetchTime</code>, if it is higher than the
-   * {@param curTime} it returns false, and true otherwise. It will also
+   * <code>curTime</code> it returns false, and true otherwise. It will also
    * check that fetchTime is not too remote (more than <code>maxInterval</code>,
    * in which case it lowers the interval and returns true.
    *

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/FetchSchedule.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/FetchSchedule.java?rev=1231532&r1=1231531&r2=1231532&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/FetchSchedule.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/FetchSchedule.java Sat Jan 14 17:24:49 2012
@@ -47,9 +47,9 @@ public interface FetchSchedule extends C
    * @param datum datum instance to be initialized.
    *
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@see datum}, but
+   * NOTE: this may be a different instance than @see CrawlDatum, but
    * implementations should make sure that it contains at least all
-   * information from {@see datum}.
+   * information from @see CrawlDatum.
    */
   public CrawlDatum initializeSchedule(Text url, CrawlDatum datum);
   
@@ -69,11 +69,11 @@ public interface FetchSchedule extends C
    * @param prevModifiedTime previous value of modifiedTime, or 0 if not available.
    *
    * @param fetchTime the latest time, when the page was recently re-fetched. Most FetchSchedule
-   * implementations should update the value in {@see datum} to something greater than this value.
+   * implementations should update the value in @see CrawlDatum to something greater than this value.
    *
    * @param modifiedTime last time the content was modified. This information comes from
    * the protocol implementations, or is set to < 0 if not available. Most FetchSchedule
-   * implementations should update the value in {@see datum} to this value.
+   * implementations should update the value in @see CrawlDatum to this value.
    *
    * @param state if {@link #STATUS_MODIFIED}, then the content is considered to be "changed" before the
    * <code>fetchTime</code>, if {@link #STATUS_NOTMODIFIED} then the content is known to be unchanged.
@@ -82,8 +82,8 @@ public interface FetchSchedule extends C
    * are free to follow a sensible default behavior.
    *
    * @return adjusted page information, including all original information. NOTE: this may
-   * be a different instance than {@see datum}, but implementations should make sure that
-   * it contains at least all information from {@see datum}.
+   * be a different instance than @see CrawlDatum, but implementations should make sure that
+   * it contains at least all information from @see CrawlDatum}.
    */
   public CrawlDatum setFetchSchedule(Text url, CrawlDatum datum,
           long prevFetchTime, long prevModifiedTime,
@@ -100,9 +100,9 @@ public interface FetchSchedule extends C
    * @param datum datum instance to be adjusted.
    *
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@see datum}, but
+   * NOTE: this may be a different instance than @see CrawlDatum, but
    * implementations should make sure that it contains at least all
-   * information from {@see datum}.
+   * information from @see CrawlDatum.
    */
   public CrawlDatum setPageGoneSchedule(Text url, CrawlDatum datum,
           long prevFetchTime, long prevModifiedTime, long fetchTime);
@@ -124,9 +124,9 @@ public interface FetchSchedule extends C
    * @param fetchTime current fetch time.
    *
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@see datum}, but
+   * NOTE: this may be a different instance than @see CrawlDatum, but
    * implementations should make sure that it contains at least all
-   * information from {@see datum}.
+   * information from @see CrawlDatum.
    */
   public CrawlDatum setPageRetrySchedule(Text url, CrawlDatum datum,
           long prevFetchTime, long prevModifiedTime, long fetchTime);
@@ -143,7 +143,7 @@ public interface FetchSchedule extends C
    * guarantee that the page will be fetched, it just allows it to be
    * included in the further selection process based on scores. The default
    * implementation checks <code>fetchTime</code>, if it is higher than the
-   * {@see curTime} it returns false, and true otherwise. It will also
+   * curTime it returns false, and true otherwise. It will also
    * check that fetchTime is not too remote (more than <code>maxInterval</code),
    * in which case it lowers the interval and returns true.
    *
@@ -172,9 +172,9 @@ public interface FetchSchedule extends C
    * time is set.
    *
    * @return adjusted page information, including all original information.
-   * NOTE: this may be a different instance than {@see datum}, but
+   * NOTE: this may be a different instance than @see CrawlDatum, but
    * implementations should make sure that it contains at least all
-   * information from {@see datum}.
+   * information from @see CrawlDatum.
    */
   public CrawlDatum forceRefetch(Text url, CrawlDatum datum, boolean asap);
 }

Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java?rev=1231532&r1=1231531&r2=1231532&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/parse/ParseResult.java Sat Jan 14 17:24:49 2012
@@ -53,10 +53,10 @@ public class ParseResult implements Iter
   
   /**
    * Convenience method for obtaining {@link ParseResult} from a single
-   * {@link Parse} output.
-   * @param url canonical url
-   * @param parse single parse output
-   * @return result containing the single parse output
+   * <code>Parse</code> output.
+   * @param url canonical url.
+   * @param parse single parse output.
+   * @return result containing the single parse output.
    */
   public static ParseResult createParseResult(String url, Parse parse) {
     ParseResult parseResult = new ParseResult(url);
@@ -126,7 +126,7 @@ public class ParseResult implements Iter
   
   /**
    * Remove all results where status is not successful (as determined
-   * by {@link ParseStatus#isSuccess()}). Note that effects of this operation
+   * by </code>ParseStatus#isSuccess()</code>). Note that effects of this operation
    * cannot be reversed.
    */
   public void filter() {
@@ -142,7 +142,7 @@ public class ParseResult implements Iter
 
   /**
    * A convenience method which returns true only if all parses are successful.
-   * Parse success is determined by {@link ParseStatus#isSuccess()}
+   * Parse success is determined by <code>ParseStatus#isSuccess()</code>.
    */
   public boolean isSuccess() {
     for(Iterator<Entry<Text, Parse>> i = iterator(); i.hasNext();) {