You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by sn...@apache.org on 2023/03/06 10:46:32 UTC
[nutch] branch master updated: NUTCH-2972 Javadoc build fails using JDK 17 - fix Javadoc issues when building with JDK 17
This is an automated email from the ASF dual-hosted git repository.
snagel pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new a92878df1 NUTCH-2972 Javadoc build fails using JDK 17 - fix Javadoc issues when building with JDK 17
a92878df1 is described below
commit a92878df1ea586057dc8bc7e9ade376a9b8edc20
Author: Sebastian Nagel <sn...@apache.org>
AuthorDate: Fri Feb 24 17:16:27 2023 +0100
NUTCH-2972 Javadoc build fails using JDK 17
- fix Javadoc issues when building with JDK 17
---
src/java/org/apache/nutch/segment/SegmentMerger.java | 14 ++++++++------
src/java/org/apache/nutch/tools/arc/ArcRecordReader.java | 16 +++++++---------
.../apache/nutch/urlfilter/suffix/SuffixURLFilter.java | 8 +++++---
3 files changed, 20 insertions(+), 18 deletions(-)
diff --git a/src/java/org/apache/nutch/segment/SegmentMerger.java b/src/java/org/apache/nutch/segment/SegmentMerger.java
index 056df3c88..6bb90e472 100644
--- a/src/java/org/apache/nutch/segment/SegmentMerger.java
+++ b/src/java/org/apache/nutch/segment/SegmentMerger.java
@@ -76,7 +76,9 @@ import org.apache.nutch.util.NutchJob;
* <p>
* Also, it's possible to slice the resulting segment into chunks of fixed size.
* </p>
- * <h3>Important Notes</h3> <h4>Which parts are merged?</h4>
+ * <section>
+ * <h2>Important Notes</h2>
+ * <h3>Which parts are merged?</h3>
* <p>
* It doesn't make sense to merge data from segments, which are at different
* stages of processing (e.g. one unfetched segment, one fetched but not parsed,
@@ -87,14 +89,14 @@ import org.apache.nutch.util.NutchJob;
* fall back to just merging fetchlists, and it will skip all other data from
* all segments.
* </p>
- * <h4>Merging fetchlists</h4>
+ * <h3>Merging fetchlists</h3>
* <p>
* Merging segments, which contain just fetchlists (i.e. prior to fetching) is
* not recommended, because this tool (unlike the
* {@link org.apache.nutch.crawl.Generator} doesn't ensure that fetchlist parts
* for each map task are disjoint.
* </p>
- * <h4>Duplicate content</h4>
+ * <h3>Duplicate content</h3>
* Merging segments removes older content whenever possible (see below).
* However, this is NOT the same as de-duplication, which in addition removes
* identical content found at different URL-s. In other words, running
@@ -108,15 +110,15 @@ import org.apache.nutch.util.NutchJob;
* segments be named in an increasing lexicographic order as their creation time
* increases.
* </p>
- * <h4>Merging and indexes</h4>
+ * <h3>Merging and indexes</h3>
* <p>
* Merged segment gets a different name. Since Indexer embeds segment names in
* indexes, any indexes originally created for the input segments will NOT work
* with the merged segment. Newly created merged segment(s) need to be indexed
* afresh. This tool doesn't use existing indexes in any way, so if you plan to
* merge segments you don't have to index them prior to merging.
- *
- * @author Andrzej Bialecki
+ * </p>
+ * </section>
*/
public class SegmentMerger extends Configured implements Tool{
private static final Logger LOG = LoggerFactory
diff --git a/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java b/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
index 0a93947e4..b514a63fc 100644
--- a/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
+++ b/src/java/org/apache/nutch/tools/arc/ArcRecordReader.java
@@ -38,19 +38,17 @@ import org.apache.hadoop.util.ReflectionUtils;
/**
* The <code>ArchRecordReader</code> class provides a record reader which reads
* records from arc files.
- * <p>
+ *
* Arc files are essentially tars of gzips. Each record in an arc file is a
* compressed gzip. Multiple records are concatenated together to form a
- * complete arc.</p>
- * <p>For more information on the arc file format
- * @see <a href='http://www.archive.org/web/researcher/ArcFileFormat.php'>ArcFileFormat</a>.
- * </p>
+ * complete arc.
*
- * <p>
- * Arc files are used by the internet archive and grub projects.
- * </p>
+ * For more information on the arc file format
+ * @see <a href='http://www.archive.org/web/researcher/ArcFileFormat.php'>ArcFileFormat</a>.
+
+ * Arc files are used by the Internet Archive and grub projects.
*
- * @see <a href='http://www.archive.org/'>archive.org</a>
+ * @see <a href='https://www.archive.org/'>archive.org</a>
* @see <a href='http://www.grub.org/'>grub.org</a>
*/
public class ArcRecordReader extends RecordReader<Text, BytesWritable> {
diff --git a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java b/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
index dd8605f79..5edf5fc38 100644
--- a/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
+++ b/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
@@ -78,6 +78,9 @@ import java.net.MalformedURLException;
* expressions, it only accepts literal suffixes. I.e. a suffix "+*.jpg" is most
* probably wrong, you should use "+.jpg" instead.
* </p>
+ *
+ * <section>
+ * <h2>Examples</h2>
* <h3>Example 1</h3>
* <p>
* The configuration shown below will accept all URLs with '.html' or '.htm'
@@ -96,7 +99,7 @@ import java.net.MalformedURLException;
* .htm
* </pre>
*
- * <h4>Example 2</h4>
+ * <h3>Example 2</h3>
* <p>
* The configuration shown below will accept all URLs except common graphical
* formats.
@@ -115,8 +118,7 @@ import java.net.MalformedURLException;
* .jpeg
* .bmp
* </pre>
- *
- * @author Andrzej Bialecki
+ * </section>
*/
public class SuffixURLFilter implements URLFilter {