You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by te...@apache.org on 2013/05/21 03:19:27 UTC
svn commit: r1484634 [1/2] - in /nutch/trunk: ./
src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/fetcher/
src/java/org/apache/nutch/indexer/ src/java/org/apache/nutch/metadata/
src/java/org/apache/nutch/net/ src/java/org/apache/nutch/parse/ ...
Author: tejasp
Date: Tue May 21 01:19:26 2013
New Revision: 1484634
URL: http://svn.apache.org/r1484634
Log:
NUTCH-1249 and NUTCH-1275 : Resolve all issues flagged up by adding javac -Xlint argument
Modified:
nutch/trunk/CHANGES.txt
nutch/trunk/build.xml
nutch/trunk/src/java/org/apache/nutch/crawl/FetchScheduleFactory.java
nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java
nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java
nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java
nutch/trunk/src/java/org/apache/nutch/crawl/SignatureComparator.java
nutch/trunk/src/java/org/apache/nutch/crawl/SignatureFactory.java
nutch/trunk/src/java/org/apache/nutch/fetcher/OldFetcher.java
nutch/trunk/src/java/org/apache/nutch/indexer/NutchField.java
nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java
nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java
nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java
nutch/trunk/src/java/org/apache/nutch/net/URLNormalizers.java
nutch/trunk/src/java/org/apache/nutch/parse/HTMLMetaTags.java
nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java
nutch/trunk/src/java/org/apache/nutch/plugin/Extension.java
nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java
nutch/trunk/src/java/org/apache/nutch/plugin/PluginRepository.java
nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java
nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java
nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LoopReader.java
nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/Loops.java
nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java
nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeReader.java
nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java
nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
nutch/trunk/src/java/org/apache/nutch/segment/SegmentMergeFilter.java
nutch/trunk/src/java/org/apache/nutch/segment/SegmentMergeFilters.java
nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
nutch/trunk/src/java/org/apache/nutch/tools/FreeGenerator.java
nutch/trunk/src/java/org/apache/nutch/tools/ResolveUrls.java
nutch/trunk/src/java/org/apache/nutch/tools/proxy/SegmentHandler.java
nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java
nutch/trunk/src/java/org/apache/nutch/util/PrefixStringMatcher.java
nutch/trunk/src/java/org/apache/nutch/util/SuffixStringMatcher.java
nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
nutch/trunk/src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java
nutch/trunk/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java
nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java
nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java
nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java
nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java
nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java
nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
nutch/trunk/src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java
nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java
nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java
nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java
nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java
Modified: nutch/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue May 21 01:19:26 2013
@@ -2,6 +2,8 @@ Nutch Change Log
(trunk): Current Development
+* NUTCH-1249 and NUTCH-1275 : Resolve all issues flagged up by adding javac -Xlint argument (tejasp)
+
* NUTCH-1053 Parsing of RSS feeds fails (tejasp)
* Added crawler-commons dependency in pom.xml (tejasp)
Modified: nutch/trunk/build.xml
URL: http://svn.apache.org/viewvc/nutch/trunk/build.xml?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/build.xml (original)
+++ nutch/trunk/build.xml Tue May 21 01:19:26 2013
@@ -57,6 +57,10 @@
</fileset>
</path>
+ <presetdef name="javac">
+ <javac includeantruntime="false" />
+ </presetdef>
+
<!-- ====================================================== -->
<!-- Stuff needed by all targets -->
<!-- ====================================================== -->
@@ -94,7 +98,7 @@
target="${javac.version}"
source="${javac.version}"
deprecation="${javac.deprecation}">
- <compilerarg value="-Xlint"/>
+ <compilerarg value="-Xlint:-path"/>
<classpath refid="classpath"/>
</javac>
</target>
@@ -341,7 +345,7 @@
target="${javac.version}"
source="${javac.version}"
deprecation="${javac.deprecation}">
- <compilerarg value="-Xlint"/>
+ <compilerarg value="-Xlint:-path"/>
<classpath refid="test.classpath"/>
</javac>
</target>
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/FetchScheduleFactory.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/FetchScheduleFactory.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/FetchScheduleFactory.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/FetchScheduleFactory.java Tue May 21 01:19:26 2013
@@ -37,7 +37,7 @@ public class FetchScheduleFactory {
if (impl == null) {
try {
LOG.info("Using FetchSchedule impl: " + clazz);
- Class implClass = Class.forName(clazz);
+ Class<?> implClass = Class.forName(clazz);
impl = (FetchSchedule)implClass.newInstance();
impl.setConf(conf);
objectCache.setObject(clazz, impl);
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/Generator.java Tue May 21 01:19:26 2013
@@ -368,6 +368,7 @@ public class Generator extends Configure
super(Text.class);
}
+ @SuppressWarnings("rawtypes" )
public int compare(WritableComparable a, WritableComparable b) {
Text url1 = (Text) a;
Text url2 = (Text) b;
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/Injector.java Tue May 21 01:19:26 2013
@@ -60,7 +60,7 @@ public class Injector extends Configured
public static String nutchFixedFetchIntervalMDName = "nutch.fetchInterval.fixed";
/** Normalize and filter injected urls. */
- public static class InjectMapper implements Mapper<WritableComparable, Text, Text, CrawlDatum> {
+ public static class InjectMapper implements Mapper<WritableComparable<?>, Text, Text, CrawlDatum> {
private URLNormalizers urlNormalizers;
private int interval;
private float scoreInjected;
@@ -81,7 +81,7 @@ public class Injector extends Configured
public void close() {}
- public void map(WritableComparable key, Text value,
+ public void map(WritableComparable<?> key, Text value,
OutputCollector<Text, CrawlDatum> output, Reporter reporter)
throws IOException {
String url = value.toString(); // value is line of text
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbReader.java Tue May 21 01:19:26 2013
@@ -43,7 +43,7 @@ import java.io.Closeable;
public class LinkDbReader extends Configured implements Tool, Closeable {
public static final Logger LOG = LoggerFactory.getLogger(LinkDbReader.class);
- private static final Partitioner<WritableComparable, Writable> PARTITIONER = new HashPartitioner<WritableComparable, Writable>();
+ private static final Partitioner<WritableComparable<?>, Writable> PARTITIONER = new HashPartitioner<WritableComparable<?>, Writable>();
private FileSystem fs;
private Path directory;
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java Tue May 21 01:19:26 2013
@@ -79,9 +79,9 @@ public class MapWritable implements Writ
private ClassIdEntry fIdFirst;
- private static Map<Class, Byte> CLASS_ID_MAP = new HashMap<Class, Byte>();
+ private static Map<Class<?>, Byte> CLASS_ID_MAP = new HashMap<Class<?>, Byte>();
- private static Map<Byte, Class> ID_CLASS_MAP = new HashMap<Byte, Class>();
+ private static Map<Byte, Class<?>> ID_CLASS_MAP = new HashMap<Byte, Class<?>>();
static {
@@ -101,7 +101,7 @@ public class MapWritable implements Writ
}
- private static void addToMap(Class clazz, Byte byteId) {
+ private static void addToMap(Class<?> clazz, Byte byteId) {
CLASS_ID_MAP.put(clazz, byteId);
ID_CLASS_MAP.put(byteId, clazz);
}
@@ -338,7 +338,7 @@ public class MapWritable implements Writ
// read class-id map
fIdCount = in.readByte();
byte id;
- Class clazz;
+ Class<?> clazz;
for (int i = 0; i < fIdCount; i++) {
try {
id = in.readByte();
@@ -393,7 +393,7 @@ public class MapWritable implements Writ
}
}
- private byte addIdEntry(byte id, Class clazz) {
+ private byte addIdEntry(byte id, Class<?> clazz) {
if (fIdFirst == null) {
fIdFirst = fIdLast = new ClassIdEntry(id, clazz);
} else {
@@ -402,7 +402,7 @@ public class MapWritable implements Writ
return id;
}
- private byte getClassId(Class clazz) {
+ private byte getClassId(Class<?> clazz) {
Byte classId = CLASS_ID_MAP.get(clazz);
if (classId != null) {
return classId.byteValue();
@@ -438,8 +438,8 @@ public class MapWritable implements Writ
last = entry;
entry = entry.fNextEntry;
}
- Class keyClass = getClass(keyId);
- Class valueClass = getClass(valueId);
+ Class<?> keyClass = getClass(keyId);
+ Class<?> valueClass = getClass(valueId);
try {
return new KeyValueEntry((Writable) keyClass.newInstance(),
(Writable) valueClass.newInstance());
@@ -449,8 +449,8 @@ public class MapWritable implements Writ
}
- private Class getClass(final byte id) throws IOException {
- Class clazz = ID_CLASS_MAP.get(new Byte(id));
+ private Class<?> getClass(final byte id) throws IOException {
+ Class<?> clazz = ID_CLASS_MAP.get(new Byte(id));
if (clazz == null) {
ClassIdEntry entry = fIdFirst;
while (entry != null) {
@@ -502,14 +502,14 @@ public class MapWritable implements Writ
/** container for Id class tuples */
private class ClassIdEntry {
- public ClassIdEntry(byte id, Class clazz) {
+ public ClassIdEntry(byte id, Class<?> clazz) {
fId = id;
fclazz = clazz;
}
private byte fId;
- private Class fclazz;
+ private Class<?> fclazz;
private ClassIdEntry fNextIdEntry;
}
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/NutchWritable.java Tue May 21 01:19:26 2013
@@ -19,12 +19,13 @@ package org.apache.nutch.crawl;
import org.apache.hadoop.io.Writable;
import org.apache.nutch.util.GenericWritableConfigurable;
+@SuppressWarnings("unchecked")
public class NutchWritable extends GenericWritableConfigurable {
private static Class<? extends Writable>[] CLASSES = null;
static {
- CLASSES = new Class[] {
+ CLASSES = (Class<? extends Writable>[]) new Class<?>[] {
org.apache.hadoop.io.NullWritable.class,
org.apache.hadoop.io.BooleanWritable.class,
org.apache.hadoop.io.LongWritable.class,
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/SignatureComparator.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/SignatureComparator.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/SignatureComparator.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/SignatureComparator.java Tue May 21 01:19:26 2013
@@ -19,7 +19,7 @@ package org.apache.nutch.crawl;
import java.util.Comparator;
-public class SignatureComparator implements Comparator {
+public class SignatureComparator implements Comparator<Object> {
public int compare(Object o1, Object o2) {
return _compare(o1, o2);
}
Modified: nutch/trunk/src/java/org/apache/nutch/crawl/SignatureFactory.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/SignatureFactory.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/SignatureFactory.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/SignatureFactory.java Tue May 21 01:19:26 2013
@@ -47,7 +47,7 @@ public class SignatureFactory {
if (LOG.isInfoEnabled()) {
LOG.info("Using Signature impl: " + clazz);
}
- Class implClass = Class.forName(clazz);
+ Class<?> implClass = Class.forName(clazz);
impl = (Signature)implClass.newInstance();
impl.setConf(conf);
objectCache.setObject(clazz, impl);
Modified: nutch/trunk/src/java/org/apache/nutch/fetcher/OldFetcher.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/fetcher/OldFetcher.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/fetcher/OldFetcher.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/fetcher/OldFetcher.java Tue May 21 01:19:26 2013
@@ -45,7 +45,7 @@ import org.apache.nutch.util.*;
/** The fetcher. Most of the work is done by plugins. */
-public class OldFetcher extends Configured implements Tool, MapRunnable<WritableComparable, Writable, Text, NutchWritable> {
+public class OldFetcher extends Configured implements Tool, MapRunnable<WritableComparable<?>, Writable, Text, NutchWritable> {
public static final Logger LOG = LoggerFactory.getLogger(OldFetcher.class);
@@ -55,12 +55,11 @@ public class OldFetcher extends Configur
public static final String PROTOCOL_REDIR = "protocol";
- public static class InputFormat extends SequenceFileInputFormat<WritableComparable, Writable> {
+ public static class InputFormat extends SequenceFileInputFormat<WritableComparable<?>, Writable> {
/** Don't split inputs, to keep things polite. */
public InputSplit[] getSplits(JobConf job, int nSplits)
throws IOException {
FileStatus[] files = listStatus(job);
- FileSystem fs = FileSystem.get(job);
InputSplit[] splits = new InputSplit[files.length];
for (int i = 0; i < files.length; i++) {
FileStatus cur = files[i];
@@ -71,7 +70,7 @@ public class OldFetcher extends Configur
}
}
- private RecordReader<WritableComparable, Writable> input;
+ private RecordReader<WritableComparable<?>, Writable> input;
private OutputCollector<Text, NutchWritable> output;
private Reporter reporter;
@@ -458,7 +457,7 @@ public class OldFetcher extends Configur
return conf.getBoolean("fetcher.store.content", true);
}
- public void run(RecordReader<WritableComparable, Writable> input, OutputCollector<Text, NutchWritable> output,
+ public void run(RecordReader<WritableComparable<?>, Writable> input, OutputCollector<Text, NutchWritable> output,
Reporter reporter) throws IOException {
this.input = input;
Modified: nutch/trunk/src/java/org/apache/nutch/indexer/NutchField.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/indexer/NutchField.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/indexer/NutchField.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/indexer/NutchField.java Tue May 21 01:19:26 2013
@@ -28,8 +28,8 @@ import java.util.List;
import org.apache.hadoop.io.*;
/**
- * This class represents a multi-valued field with a weight. Values are arbitrary
- * objects.
+ * This class represents a multi-valued field with a weight.
+ * Values are arbitrary objects.
*/
public class NutchField implements Writable {
private float weight;
@@ -44,7 +44,7 @@ public class NutchField implements Writa
public NutchField(Object value, float weight) {
this.weight = weight;
if (value instanceof Collection) {
- values.addAll((Collection<Object>)value);
+ values.addAll((Collection<?>)value);
} else {
values.add(value);
}
Modified: nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/metadata/HttpHeaders.java Tue May 21 01:19:26 2013
@@ -23,9 +23,6 @@ import org.apache.hadoop.io.Text;
*
* @see <a href="http://rfc-ref.org/RFC-TEXTS/2616/">Hypertext Transfer
* Protocol -- HTTP/1.1 (RFC 2616)</a>
- *
- * @author Chris Mattmann
- * @author Jérôme Charron
*/
public interface HttpHeaders {
Modified: nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/metadata/Metadata.java Tue May 21 01:19:26 2013
@@ -27,13 +27,8 @@ import java.util.Properties;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
-
/**
* A multi-valued metadata container.
- *
- * @author Chris Mattmann
- * @author Jérôme Charron
- *
*/
public class Metadata implements Writable, CreativeCommons,
DublinCore, HttpHeaders, Nutch, Feed {
@@ -128,7 +123,7 @@ DublinCore, HttpHeaders, Nutch, Feed {
* @param properties properties to copy from
*/
public void setAll(Properties properties) {
- Enumeration names = properties.propertyNames();
+ Enumeration<?> names = properties.propertyNames();
while (names.hasMoreElements()) {
String name = (String) names.nextElement();
metadata.put(name, new String[]{properties.getProperty(name)});
Modified: nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/metadata/SpellCheckedMetadata.java Tue May 21 01:19:26 2013
@@ -52,9 +52,9 @@ public class SpellCheckedMetadata extend
// Uses following array to fill the metanames index and the
// metanames list.
- Class[] spellthese = {HttpHeaders.class};
+ Class<?>[] spellthese = {HttpHeaders.class};
- for (Class spellCheckedNames : spellthese) {
+ for (Class<?> spellCheckedNames : spellthese) {
for (Field field : spellCheckedNames.getFields()) {
int mods = field.getModifiers();
if (Modifier.isFinal(mods) && Modifier.isPublic(mods)
Modified: nutch/trunk/src/java/org/apache/nutch/net/URLNormalizers.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/net/URLNormalizers.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/net/URLNormalizers.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/net/URLNormalizers.java Tue May 21 01:19:26 2013
@@ -101,7 +101,7 @@ public final class URLNormalizers {
public static final Logger LOG = LoggerFactory.getLogger(URLNormalizers.class);
/* Empty extension list for caching purposes. */
- private final List<Extension> EMPTY_EXTENSION_LIST = Collections.EMPTY_LIST;
+ private final List<Extension> EMPTY_EXTENSION_LIST = Collections.<Extension>emptyList();
private final URLNormalizer[] EMPTY_NORMALIZERS = new URLNormalizer[0];
@@ -194,6 +194,7 @@ public final class URLNormalizers {
* empty list.
* @throws PluginRuntimeException
*/
+ @SuppressWarnings("unchecked")
private List<Extension> getExtensions(String scope) {
ObjectCache objectCache = ObjectCache.get(conf);
List<Extension> extensions =
Modified: nutch/trunk/src/java/org/apache/nutch/parse/HTMLMetaTags.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/HTMLMetaTags.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/parse/HTMLMetaTags.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/parse/HTMLMetaTags.java Tue May 21 01:19:26 2013
@@ -188,7 +188,7 @@ public class HTMLMetaTags {
+ ", refreshHref=" + refreshHref + "\n"
);
sb.append(" * general tags:\n");
- Iterator it = generalTags.keySet().iterator();
+ Iterator<Object> it = generalTags.keySet().iterator();
while (it.hasNext()) {
String key = (String)it.next();
sb.append(" - " + key + "\t=\t" + generalTags.get(key) + "\n");
Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/parse/ParseSegment.java Tue May 21 01:19:26 2013
@@ -42,7 +42,7 @@ import java.util.Map.Entry;
/* Parse content in a segment. */
public class ParseSegment extends Configured implements Tool,
- Mapper<WritableComparable, Content, Text, ParseImpl>,
+ Mapper<WritableComparable<?>, Content, Text, ParseImpl>,
Reducer<Text, Writable, Text, Writable> {
public static final Logger LOG = LoggerFactory.getLogger(ParseSegment.class);
@@ -71,7 +71,7 @@ public class ParseSegment extends Config
private Text newKey = new Text();
- public void map(WritableComparable key, Content content,
+ public void map(WritableComparable<?> key, Content content,
OutputCollector<Text, ParseImpl> output, Reporter reporter)
throws IOException {
// convert on the fly from old UTF8 keys
Modified: nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/parse/ParserFactory.java Tue May 21 01:19:26 2013
@@ -48,7 +48,7 @@ public final class ParserFactory {
public static final String DEFAULT_PLUGIN = "*";
/** Empty extension list for caching purposes. */
- private final List EMPTY_EXTENSION_LIST = Collections.EMPTY_LIST;
+ private final List<Extension> EMPTY_EXTENSION_LIST = Collections.<Extension>emptyList();
private Configuration conf;
private ExtensionPoint extensionPoint;
@@ -57,9 +57,9 @@ public final class ParserFactory {
public ParserFactory(Configuration conf) {
this.conf = conf;
ObjectCache objectCache = ObjectCache.get(conf);
- this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(
- Parser.X_POINT_ID);
+ this.extensionPoint = PluginRepository.get(conf).getExtensionPoint(Parser.X_POINT_ID);
this.parsePluginList = (ParsePluginList)objectCache.getObject(ParsePluginList.class.getName());
+
if (this.parsePluginList == null) {
this.parsePluginList = new ParsePluginsReader().parse(conf);
objectCache.setObject(ParsePluginList.class.getName(), this.parsePluginList);
@@ -121,8 +121,8 @@ public final class ParserFactory {
}
parsers = new Vector<Parser>(parserExts.size());
- for (Iterator i=parserExts.iterator(); i.hasNext(); ){
- Extension ext = (Extension) i.next();
+ for (Iterator<Extension> i = parserExts.iterator(); i.hasNext(); ){
+ Extension ext = i.next();
Parser p = null;
try {
//check to see if we've cached this parser instance yet
@@ -212,6 +212,7 @@ public final class ParserFactory {
* @return a list of extensions to be used for this contentType.
* If none, returns <code>null</code>.
*/
+ @SuppressWarnings("unchecked")
protected List<Extension> getExtensions(String contentType) {
ObjectCache objectCache = ObjectCache.get(conf);
@@ -411,5 +412,4 @@ public final class ParserFactory {
private Extension getExtensionFromAlias(Extension[] list, String id) {
return getExtension(list, parsePluginList.getAliases().get(id));
}
-
}
Modified: nutch/trunk/src/java/org/apache/nutch/plugin/Extension.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/plugin/Extension.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/plugin/Extension.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/plugin/Extension.java Tue May 21 01:19:26 2013
@@ -25,8 +25,6 @@ import org.apache.hadoop.conf.Configurab
* An <code>Extension</code> is a kind of listener descriptor that will be
* installed on a concrete <code>ExtensionPoint</code> that acts as kind of
* Publisher.
- *
- * @author joa23
*/
public class Extension {
private PluginDescriptor fDescriptor;
@@ -153,7 +151,7 @@ public class Extension {
synchronized (getId()) {
try {
PluginClassLoader loader = fDescriptor.getClassLoader();
- Class extensionClazz = loader.loadClass(getClazz());
+ Class<?> extensionClazz = loader.loadClass(getClazz());
// lazy loading of Plugin in case there is no instance of the plugin
// already.
this.pluginRepository.getPluginInstance(getDescriptor());
Modified: nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/plugin/PluginDescriptor.java Tue May 21 01:19:26 2013
@@ -37,8 +37,6 @@ import org.apache.hadoop.conf.Configurat
* <code>ExtensionPoint</code> and <code>Extension</code>. To provide
* access to the meta data of a plugin via a descriptor allow a lazy loading
* mechanism.
- *
- * @author joa23
*/
public class PluginDescriptor {
private String fPluginPath;
@@ -47,7 +45,7 @@ public class PluginDescriptor {
private String fVersion;
private String fName;
private String fProviderName;
- private HashMap fMessages = new HashMap();
+ private HashMap<String, ResourceBundle> fMessages = new HashMap<String, ResourceBundle>();
private ArrayList<ExtensionPoint> fExtensionPoints = new ArrayList<ExtensionPoint>();
private ArrayList<String> fDependencies = new ArrayList<String>();
private ArrayList<URL> fExportedLibs = new ArrayList<URL>();
@@ -338,8 +336,7 @@ public class PluginDescriptor {
public String getResourceString(String pKey, Locale pLocale)
throws IOException {
if (fMessages.containsKey(pLocale.toString())) {
- ResourceBundle bundle = (ResourceBundle) fMessages
- .get(pLocale.toString());
+ ResourceBundle bundle = fMessages.get(pLocale.toString());
try {
return bundle.getString(pKey);
} catch (MissingResourceException e) {
Modified: nutch/trunk/src/java/org/apache/nutch/plugin/PluginRepository.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/plugin/PluginRepository.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/plugin/PluginRepository.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/plugin/PluginRepository.java Tue May 21 01:19:26 2013
@@ -39,8 +39,6 @@ import org.apache.nutch.util.NutchConfig
* descriptor represents all meta information about a plugin. So a plugin
* instance will be created later when it is required, this allow lazy plugin
* loading.
- *
- * @author joa23
*/
public class PluginRepository {
private static final WeakHashMap<String, PluginRepository> CACHE = new WeakHashMap<String, PluginRepository>();
@@ -267,8 +265,8 @@ public class PluginRepository {
// Suggested by Stefan Groschupf <sg...@media-style.com>
synchronized (pDescriptor) {
PluginClassLoader loader = pDescriptor.getClassLoader();
- Class pluginClass = loader.loadClass(pDescriptor.getPluginClass());
- Constructor constructor = pluginClass.getConstructor(new Class[] {
+ Class<?> pluginClass = loader.loadClass(pDescriptor.getPluginClass());
+ Constructor<?> constructor = pluginClass.getConstructor(new Class<?>[] {
PluginDescriptor.class, Configuration.class });
Plugin plugin = (Plugin) constructor.newInstance(new Object[] {
pDescriptor, this.conf });
@@ -400,7 +398,7 @@ public class PluginRepository {
}
ClassLoader cl = d.getClassLoader();
// args[1] - class name
- Class clazz = null;
+ Class<?> clazz = null;
try {
clazz = Class.forName(args[1], true, cl);
} catch (Exception e) {
@@ -410,7 +408,7 @@ public class PluginRepository {
}
Method m = null;
try {
- m = clazz.getMethod("main", new Class[] { args.getClass() });
+ m = clazz.getMethod("main", new Class<?>[] { args.getClass() });
} catch (Exception e) {
System.err.println("Could not find the 'main(String[])' method in class "
+ args[1] + ": " + e.getMessage());
Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkDumper.java Tue May 21 01:19:26 2013
@@ -433,12 +433,17 @@ public class LinkDumper
throws Exception {
Options options = new Options();
- Option helpOpts = OptionBuilder.withArgName("help").withDescription(
- "show this help message").create("help");
- Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
- .withDescription("the web graph database to use").create("webgraphdb");
+ OptionBuilder.withArgName("help");
+ OptionBuilder.withDescription("show this help message");
+ Option helpOpts = OptionBuilder.create("help");
options.addOption(helpOpts);
+
+ OptionBuilder.withArgName("webgraphdb");
+ OptionBuilder.hasArg();
+ OptionBuilder.withDescription("the web graph database to use");
+ Option webGraphDbOpts = OptionBuilder.create("webgraphdb");
options.addOption(webGraphDbOpts);
+
CommandLineParser parser = new GnuParser();
try {
Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LinkRank.java Tue May 21 01:19:26 2013
@@ -287,12 +287,10 @@ public class LinkRank
implements Mapper<Text, Node, Text, LongWritable>,
Reducer<Text, LongWritable, Text, LongWritable> {
- private JobConf conf;
private static Text numNodes = new Text(NUM_NODES);
private static LongWritable one = new LongWritable(1L);
public void configure(JobConf conf) {
- this.conf = conf;
}
/**
@@ -678,11 +676,15 @@ public class LinkRank
throws Exception {
Options options = new Options();
- Option helpOpts = OptionBuilder.withArgName("help").withDescription(
- "show this help message").create("help");
- Option webgraphOpts = OptionBuilder.withArgName("webgraphdb").hasArg().withDescription(
- "the web graph db to use").create("webgraphdb");
+ OptionBuilder.withArgName("help");
+ OptionBuilder.withDescription("show this help message");
+ Option helpOpts = OptionBuilder.create("help");
options.addOption(helpOpts);
+
+ OptionBuilder.withArgName("webgraphdb");
+ OptionBuilder.hasArg();
+ OptionBuilder.withDescription("the web graph db to use");
+ Option webgraphOpts = OptionBuilder.create("webgraphdb");
options.addOption(webgraphOpts);
CommandLineParser parser = new GnuParser();
Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LoopReader.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LoopReader.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LoopReader.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/LoopReader.java Tue May 21 01:19:26 2013
@@ -45,9 +45,7 @@ public class LoopReader extends Configur
private FileSystem fs;
private MapFile.Reader[] loopReaders;
- public LoopReader() {
-
- }
+ public LoopReader() { }
public LoopReader(Configuration conf) {
super(conf);
@@ -94,14 +92,21 @@ public class LoopReader extends Configur
throws Exception {
Options options = new Options();
- Option helpOpts = OptionBuilder.withArgName("help").withDescription(
- "show this help message").create("help");
- Option webGraphOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
- .withDescription("the webgraphdb to use").create("webgraphdb");
- Option urlOpts = OptionBuilder.withArgName("url").hasOptionalArg()
- .withDescription("the url to dump").create("url");
+ OptionBuilder.withArgName("help");
+ OptionBuilder.withDescription("show this help message");
+ Option helpOpts = OptionBuilder.create("help");
options.addOption(helpOpts);
+
+ OptionBuilder.withArgName("webgraphdb");
+ OptionBuilder.hasArg();
+ OptionBuilder.withDescription("the webgraphdb to use");
+ Option webGraphOpts = OptionBuilder.create("webgraphdb");
options.addOption(webGraphOpts);
+
+ OptionBuilder.withArgName("url");
+ OptionBuilder.hasOptionalArg();
+ OptionBuilder.withDescription("the url to dump");
+ Option urlOpts = OptionBuilder.create("url");
options.addOption(urlOpts);
CommandLineParser parser = new GnuParser();
Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/Loops.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/Loops.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/Loops.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/Loops.java Tue May 21 01:19:26 2013
@@ -583,11 +583,15 @@ public class Loops
throws Exception {
Options options = new Options();
- Option helpOpts = OptionBuilder.withArgName("help").withDescription(
- "show this help message").create("help");
- Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg().withDescription(
- "the web graph database to use").create("webgraphdb");
+ OptionBuilder.withArgName("help");
+ OptionBuilder.withDescription("show this help message");
+ Option helpOpts = OptionBuilder.create("help");
options.addOption(helpOpts);
+
+ OptionBuilder.withArgName("webgraphdb");
+ OptionBuilder.hasArg();
+ OptionBuilder.withDescription("the web graph database to use");
+ Option webGraphDbOpts = OptionBuilder.create("webgraphdb");
options.addOption(webGraphDbOpts);
CommandLineParser parser = new GnuParser();
Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java Tue May 21 01:19:26 2013
@@ -343,36 +343,57 @@ public class NodeDumper
throws Exception {
Options options = new Options();
- Option helpOpts = OptionBuilder.withArgName("help").withDescription(
- "show this help message").create("help");
- Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg().withDescription(
- "the web graph database to use").create("webgraphdb");
- Option inlinkOpts = OptionBuilder.withArgName("inlinks").withDescription(
- "show highest inlinks").create("inlinks");
- Option outlinkOpts = OptionBuilder.withArgName("outlinks").withDescription(
- "show highest outlinks").create("outlinks");
- Option scoreOpts = OptionBuilder.withArgName("scores").withDescription(
- "show highest scores").create("scores");
- Option topNOpts = OptionBuilder.withArgName("topn").hasOptionalArg().withDescription(
- "show topN scores").create("topn");
- Option outputOpts = OptionBuilder.withArgName("output").hasArg().withDescription(
- "the output directory to use").create("output");
- Option effOpts = OptionBuilder.withArgName("asEff").withDescription(
- "Solr ExternalFileField compatible output format").create("asEff");
- Option groupOpts = OptionBuilder.hasArgs(2).withDescription(
- "group <host|domain> <sum|max>").create("group");
- Option sequenceFileOpts = OptionBuilder.withArgName("asSequenceFile").withDescription(
- "whether to output as a sequencefile").create("asSequenceFile");
-
+ OptionBuilder.withArgName("help");
+ OptionBuilder.withDescription("show this help message");
+ Option helpOpts = OptionBuilder.create("help");
options.addOption(helpOpts);
+
+ OptionBuilder.withArgName("webgraphdb");
+ OptionBuilder.hasArg();
+ OptionBuilder.withDescription("the web graph database to use");
+ Option webGraphDbOpts = OptionBuilder.create("webgraphdb");
options.addOption(webGraphDbOpts);
+
+ OptionBuilder.withArgName("inlinks");
+ OptionBuilder.withDescription("show highest inlinks");
+ Option inlinkOpts = OptionBuilder.create("inlinks");
options.addOption(inlinkOpts);
+
+ OptionBuilder.withArgName("outlinks");
+ OptionBuilder.withDescription("show highest outlinks");
+ Option outlinkOpts = OptionBuilder.create("outlinks");
options.addOption(outlinkOpts);
+
+ OptionBuilder.withArgName("scores");
+ OptionBuilder.withDescription("show highest scores");
+ Option scoreOpts = OptionBuilder.create("scores");
options.addOption(scoreOpts);
+
+ OptionBuilder.withArgName("topn");
+ OptionBuilder.hasOptionalArg();
+ OptionBuilder.withDescription("show topN scores");
+ Option topNOpts = OptionBuilder.create("topn");
options.addOption(topNOpts);
+
+ OptionBuilder.withArgName("output");
+ OptionBuilder.hasArg();
+ OptionBuilder.withDescription("the output directory to use");
+ Option outputOpts = OptionBuilder.create("output");
options.addOption(outputOpts);
+
+ OptionBuilder.withArgName("asEff");
+ OptionBuilder.withDescription("Solr ExternalFileField compatible output format");
+ Option effOpts = OptionBuilder.create("asEff");
options.addOption(effOpts);
+
+ OptionBuilder.hasArgs(2);
+ OptionBuilder.withDescription("group <host|domain> <sum|max>");
+ Option groupOpts = OptionBuilder.create("group");
options.addOption(groupOpts);
+
+ OptionBuilder.withArgName("asSequenceFile");
+ OptionBuilder.withDescription("whether to output as a sequencefile");
+ Option sequenceFileOpts = OptionBuilder.create("asSequenceFile");
options.addOption(sequenceFileOpts);
CommandLineParser parser = new GnuParser();
@@ -388,7 +409,6 @@ public class NodeDumper
String webGraphDb = line.getOptionValue("webgraphdb");
boolean inlinks = line.hasOption("inlinks");
boolean outlinks = line.hasOption("outlinks");
- boolean scores = line.hasOption("scores");
long topN = (line.hasOption("topn")
? Long.parseLong(line.getOptionValue("topn")) : Long.MAX_VALUE);
Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeReader.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeReader.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeReader.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/NodeReader.java Tue May 21 01:19:26 2013
@@ -90,14 +90,21 @@ public class NodeReader extends Configur
throws Exception {
Options options = new Options();
- Option helpOpts = OptionBuilder.withArgName("help").withDescription(
- "show this help message").create("help");
- Option webGraphOpts = OptionBuilder.withArgName("webgraphdb").hasArg()
- .withDescription("the webgraphdb to use").create("webgraphdb");
- Option urlOpts = OptionBuilder.withArgName("url").hasOptionalArg()
- .withDescription("the url to dump").create("url");
+ OptionBuilder.withArgName("help");
+ OptionBuilder.withDescription("show this help message");
+ Option helpOpts = OptionBuilder.create("help");
options.addOption(helpOpts);
+
+ OptionBuilder.withArgName("webgraphdb");
+ OptionBuilder.hasArg();
+ OptionBuilder.withDescription("the webgraphdb to use");
+ Option webGraphOpts = OptionBuilder.create("webgraphdb");
options.addOption(webGraphOpts);
+
+ OptionBuilder.withArgName("url");
+ OptionBuilder.hasOptionalArg();
+ OptionBuilder.withDescription("the url to dump");
+ Option urlOpts = OptionBuilder.create("url");
options.addOption(urlOpts);
CommandLineParser parser = new GnuParser();
Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/ScoreUpdater.java Tue May 21 01:19:26 2013
@@ -217,14 +217,21 @@ public class ScoreUpdater
throws Exception {
Options options = new Options();
- Option helpOpts = OptionBuilder.withArgName("help").withDescription(
- "show this help message").create("help");
- Option crawlDbOpts = OptionBuilder.withArgName("crawldb").hasArg().withDescription(
- "the crawldb to use").create("crawldb");
- Option webGraphOpts = OptionBuilder.withArgName("webgraphdb").hasArg().withDescription(
- "the webgraphdb to use").create("webgraphdb");
+ OptionBuilder.withArgName("help");
+ OptionBuilder.withDescription("show this help message");
+ Option helpOpts = OptionBuilder.create("help");
options.addOption(helpOpts);
+
+ OptionBuilder.withArgName("crawldb");
+ OptionBuilder.hasArg();
+ OptionBuilder.withDescription("the crawldb to use");
+ Option crawlDbOpts = OptionBuilder.create("crawldb");
options.addOption(crawlDbOpts);
+
+ OptionBuilder.withArgName("webgraphdb");
+ OptionBuilder.hasArg();
+ OptionBuilder.withDescription("the webgraphdb to use");
+ Option webGraphOpts = OptionBuilder.create("webgraphdb");
options.addOption(webGraphOpts);
CommandLineParser parser = new GnuParser();
Modified: nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java Tue May 21 01:19:26 2013
@@ -405,28 +405,13 @@ public class WebGraph
extends Configured
implements Mapper<Text, LinkDatum, Text, LinkDatum> {
- private JobConf conf;
private long timestamp;
/**
- * Default constructor.
- */
- public InlinkDb() {
- }
-
- /**
- * Configurable constructor.
- */
- public InlinkDb(Configuration conf) {
- setConf(conf);
- }
-
- /**
* Configures job. Sets timestamp for all Inlink LinkDatum objects to the
* current system time.
*/
public void configure(JobConf conf) {
- this.conf = conf;
timestamp = System.currentTimeMillis();
}
@@ -461,30 +446,12 @@ public class WebGraph
extends Configured
implements Reducer<Text, LinkDatum, Text, Node> {
- private JobConf conf;
-
- /**
- * Default constructor.
- */
- public NodeDb() {
- }
-
- /**
- * Configurable constructor.
- */
- public NodeDb(Configuration conf) {
- setConf(conf);
- }
-
/**
* Configures job.
*/
- public void configure(JobConf conf) {
- this.conf = conf;
- }
+ public void configure(JobConf conf) { }
- public void close() {
- }
+ public void close() { }
/**
* Counts the number of inlinks and outlinks for each url and sets a default
@@ -731,23 +698,37 @@ public class WebGraph
throws Exception {
Options options = new Options();
- Option helpOpts = OptionBuilder.withArgName("help").withDescription(
- "show this help message").create("help");
- Option webGraphDbOpts = OptionBuilder.withArgName("webgraphdb").hasArg().withDescription(
- "the web graph database to use").create("webgraphdb");
- Option segOpts = OptionBuilder.withArgName("segment").hasArgs().withDescription(
- "the segment(s) to use").create("segment");
- Option segDirOpts = OptionBuilder.withArgName("segmentDir").hasArgs().withDescription(
- "the segment directory to use").create("segmentDir");
- Option normalizeOpts = OptionBuilder.withArgName("normalize").withDescription(
- "whether to use URLNormalizers on the URL's in the segment").create("normalize");
- Option filterOpts = OptionBuilder.withArgName("filter").withDescription(
- "whether to use URLFilters on the URL's in the segment").create("filter");
+ OptionBuilder.withArgName("help");
+ OptionBuilder.withDescription("show this help message");
+ Option helpOpts = OptionBuilder.create("help");
options.addOption(helpOpts);
+
+ OptionBuilder.withArgName("webgraphdb");
+ OptionBuilder.hasArg();
+ OptionBuilder.withDescription("the web graph database to use");
+ Option webGraphDbOpts = OptionBuilder.create("webgraphdb");
options.addOption(webGraphDbOpts);
+
+ OptionBuilder.withArgName("segment");
+ OptionBuilder.hasArgs();
+ OptionBuilder.withDescription("the segment(s) to use");
+ Option segOpts = OptionBuilder.create("segment");
options.addOption(segOpts);
+
+ OptionBuilder.withArgName("segmentDir");
+ OptionBuilder.hasArgs();
+ OptionBuilder.withDescription("the segment directory to use");
+ Option segDirOpts = OptionBuilder.create("segmentDir");
options.addOption(segDirOpts);
+
+ OptionBuilder.withArgName("normalize");
+ OptionBuilder.withDescription("whether to use URLNormalizers on the URL's in the segment");
+ Option normalizeOpts = OptionBuilder.create("normalize");
options.addOption(normalizeOpts);
+
+ OptionBuilder.withArgName("filter");
+ OptionBuilder.withDescription("whether to use URLFilters on the URL's in the segment");
+ Option filterOpts = OptionBuilder.create("filter");
options.addOption(filterOpts);
CommandLineParser parser = new GnuParser();
Modified: nutch/trunk/src/java/org/apache/nutch/segment/SegmentMergeFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMergeFilter.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/segment/SegmentMergeFilter.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/segment/SegmentMergeFilter.java Tue May 21 01:19:26 2013
@@ -18,7 +18,7 @@ package org.apache.nutch.segment;
import java.util.Collection;
-import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.parse.ParseText;
@@ -41,7 +41,7 @@ public interface SegmentMergeFilter {
* @return <tt>true</tt> values for this <tt>key</tt> (URL) should be merged
* into the new segment.
*/
- public boolean filter(WritableComparable key, CrawlDatum generateData,
+ public boolean filter(Text key, CrawlDatum generateData,
CrawlDatum fetchData, CrawlDatum sigData, Content content,
ParseData parseData, ParseText parseText, Collection<CrawlDatum> linked);
}
Modified: nutch/trunk/src/java/org/apache/nutch/segment/SegmentMergeFilters.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMergeFilters.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/segment/SegmentMergeFilters.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/segment/SegmentMergeFilters.java Tue May 21 01:19:26 2013
@@ -21,7 +21,7 @@ import java.util.Collection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.WritableComparable;
+import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.net.URLFilter;
import org.apache.nutch.parse.ParseData;
@@ -65,16 +65,14 @@ public class SegmentMergeFilters {
* @return <tt>true</tt> values for this <tt>key</tt> (URL) should be merged
* into the new segment.
*/
- public boolean filter(WritableComparable key, CrawlDatum generateData,
+ public boolean filter(Text key, CrawlDatum generateData,
CrawlDatum fetchData, CrawlDatum sigData, Content content,
ParseData parseData, ParseText parseText, Collection<CrawlDatum> linked) {
for (SegmentMergeFilter filter : filters) {
if (!filter.filter(key, generateData, fetchData, sigData, content,
parseData, parseText, linked)) {
if (LOG.isTraceEnabled())
- LOG
- .trace("Key " + key + " dropped by "
- + filter.getClass().getName());
+ LOG.trace("Key " + key + " dropped by " + filter.getClass().getName());
return false;
}
}
Modified: nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/segment/SegmentMerger.java Tue May 21 01:19:26 2013
@@ -16,6 +16,7 @@
*/
package org.apache.nutch.segment;
+import java.io.Closeable;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
@@ -207,7 +208,7 @@ public class SegmentMerger extends Confi
MapFile.Writer pt_out = null;
SequenceFile.Writer g_out = null;
SequenceFile.Writer p_out = null;
- HashMap sliceWriters = new HashMap();
+ HashMap<String, Closeable> sliceWriters = new HashMap<String, Closeable>();
String segmentName = job.get("segment.merger.segmentName");
public void write(Text key, MetaWrapper wrapper) throws IOException {
@@ -288,7 +289,7 @@ public class SegmentMerger extends Confi
}
public void close(Reporter reporter) throws IOException {
- Iterator<Object> it = sliceWriters.values().iterator();
+ Iterator<Closeable> it = sliceWriters.values().iterator();
while (it.hasNext()) {
Object o = it.next();
if (o instanceof SequenceFile.Writer) {
Modified: nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/segment/SegmentReader.java Tue May 21 01:19:26 2013
@@ -80,10 +80,10 @@ public class SegmentReader extends Confi
private FileSystem fs;
public static class InputCompatMapper extends MapReduceBase implements
- Mapper<WritableComparable, Writable, Text, NutchWritable> {
+ Mapper<WritableComparable<?>, Writable, Text, NutchWritable> {
private Text newKey = new Text();
- public void map(WritableComparable key, Writable value,
+ public void map(WritableComparable<?> key, Writable value,
OutputCollector<Text, NutchWritable> collector, Reporter reporter) throws IOException {
// convert on the fly from old formats with UTF8 keys.
// UTF8 deprecated and replaced by Text.
@@ -98,8 +98,8 @@ public class SegmentReader extends Confi
/** Implements a text output format */
public static class TextOutputFormat extends
- FileOutputFormat<WritableComparable, Writable> {
- public RecordWriter<WritableComparable, Writable> getRecordWriter(
+ FileOutputFormat<WritableComparable<?>, Writable> {
+ public RecordWriter<WritableComparable<?>, Writable> getRecordWriter(
final FileSystem fs, JobConf job,
String name, final Progressable progress) throws IOException {
@@ -109,8 +109,8 @@ public class SegmentReader extends Confi
if (fs.exists(segmentDumpFile)) fs.delete(segmentDumpFile, true);
final PrintStream printStream = new PrintStream(fs.create(segmentDumpFile));
- return new RecordWriter<WritableComparable, Writable>() {
- public synchronized void write(WritableComparable key, Writable value) throws IOException {
+ return new RecordWriter<WritableComparable<?>, Writable>() {
+ public synchronized void write(WritableComparable<?> key, Writable value) throws IOException {
printStream.println(value);
}
@@ -379,8 +379,8 @@ public class SegmentReader extends Confi
private List<Writable> getMapRecords(Path dir, Text key) throws Exception {
MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, dir, getConf());
ArrayList<Writable> res = new ArrayList<Writable>();
- Class keyClass = readers[0].getKeyClass();
- Class valueClass = readers[0].getValueClass();
+ Class<?> keyClass = readers[0].getKeyClass();
+ Class<?> valueClass = readers[0].getValueClass();
if (!keyClass.getName().equals("org.apache.hadoop.io.Text"))
throw new IOException("Incompatible key (" + keyClass.getName() + ")");
Writable value = (Writable)valueClass.newInstance();
@@ -403,8 +403,8 @@ public class SegmentReader extends Confi
private List<Writable> getSeqRecords(Path dir, Text key) throws Exception {
SequenceFile.Reader[] readers = SequenceFileOutputFormat.getReaders(getConf(), dir);
ArrayList<Writable> res = new ArrayList<Writable>();
- Class keyClass = readers[0].getKeyClass();
- Class valueClass = readers[0].getValueClass();
+ Class<?> keyClass = readers[0].getKeyClass();
+ Class<?> valueClass = readers[0].getValueClass();
if (!keyClass.getName().equals("org.apache.hadoop.io.Text"))
throw new IOException("Incompatible key (" + keyClass.getName() + ")");
Writable aKey = (Writable)keyClass.newInstance();
Modified: nutch/trunk/src/java/org/apache/nutch/tools/FreeGenerator.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/FreeGenerator.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/FreeGenerator.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/FreeGenerator.java Tue May 21 01:19:26 2013
@@ -57,8 +57,6 @@ import org.apache.nutch.util.TimingUtil;
* This tool generates fetchlists (segments to be fetched) from plain text
* files containing one URL per line. It's useful when arbitrary URL-s need to
* be fetched without adding them first to the CrawlDb, or during testing.
- *
- * @author Andrzej Bialecki
*/
public class FreeGenerator extends Configured implements Tool {
private static final Logger LOG = LoggerFactory.getLogger(FreeGenerator.class);
@@ -67,7 +65,7 @@ public class FreeGenerator extends Confi
private static final String NORMALIZE_KEY = "free.generator.normalize";
public static class FG extends MapReduceBase
- implements Mapper<WritableComparable, Text, Text, Generator.SelectorEntry>,
+ implements Mapper<WritableComparable<?>, Text, Text, Generator.SelectorEntry>,
Reducer<Text, Generator.SelectorEntry, Text, CrawlDatum> {
private URLNormalizers normalizers = null;
private URLFilters filters = null;
@@ -89,7 +87,7 @@ public class FreeGenerator extends Confi
Generator.SelectorEntry entry = new Generator.SelectorEntry();
- public void map(WritableComparable key, Text value, OutputCollector<Text,
+ public void map(WritableComparable<?> key, Text value, OutputCollector<Text,
Generator.SelectorEntry> output, Reporter reporter) throws IOException {
// value is a line of text
String urlString = value.toString();
Modified: nutch/trunk/src/java/org/apache/nutch/tools/ResolveUrls.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/ResolveUrls.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/ResolveUrls.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/ResolveUrls.java Tue May 21 01:19:26 2013
@@ -77,7 +77,7 @@ public class ResolveUrls {
// get the address by name and if no error is thrown then it
// is resolved successfully
- InetAddress ia = InetAddress.getByName(host);
+ InetAddress.getByName(host);
LOG.info("Resolved: " + host);
numResolved.incrementAndGet();
}
@@ -161,19 +161,25 @@ public class ResolveUrls {
public static void main(String[] args) {
Options options = new Options();
- Option helpOpts = OptionBuilder.withArgName("help").withDescription(
- "show this help message").create("help");
- Option urlOpts = OptionBuilder.withArgName("urls").hasArg().withDescription(
- "the urls file to check").create("urls");
- Option numThreadOpts = OptionBuilder.withArgName("numThreads").hasArgs().withDescription(
- "the number of threads to use").create("numThreads");
+ OptionBuilder.withArgName("help");
+ OptionBuilder.withDescription("show this help message");
+ Option helpOpts = OptionBuilder.create("help");
options.addOption(helpOpts);
+
+ OptionBuilder.withArgName("urls");
+ OptionBuilder.hasArg();
+ OptionBuilder.withDescription("the urls file to check");
+ Option urlOpts = OptionBuilder.create("urls");
options.addOption(urlOpts);
+
+ OptionBuilder.withArgName("numThreads");
+ OptionBuilder.hasArgs();
+ OptionBuilder.withDescription("the number of threads to use");
+ Option numThreadOpts = OptionBuilder.create("numThreads");
options.addOption(numThreadOpts);
CommandLineParser parser = new GnuParser();
try {
-
// parse out common line arguments
CommandLine line = parser.parse(options, args);
if (line.hasOption("help") || !line.hasOption("urls")) {
@@ -196,5 +202,4 @@ public class ResolveUrls {
LOG.error("ResolveUrls: " + StringUtils.stringifyException(e));
}
}
-
}
Modified: nutch/trunk/src/java/org/apache/nutch/tools/proxy/SegmentHandler.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/tools/proxy/SegmentHandler.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/tools/proxy/SegmentHandler.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/tools/proxy/SegmentHandler.java Tue May 21 01:19:26 2013
@@ -42,8 +42,6 @@ import org.apache.hadoop.util.StringUtil
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.metadata.Nutch;
-import org.apache.nutch.parse.ParseData;
-import org.apache.nutch.parse.ParseText;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.protocol.ProtocolStatus;
import org.mortbay.jetty.Request;
@@ -88,9 +86,8 @@ public class SegmentHandler extends Abst
private static class Segment implements Closeable {
- private static final Partitioner PARTITIONER = new HashPartitioner();
+ private static final Partitioner<Text,Writable> PARTITIONER = new HashPartitioner<Text,Writable>();
- private FileSystem fs;
private Path segmentDir;
private Object cLock = new Object();
@@ -102,7 +99,6 @@ public class SegmentHandler extends Abst
private Configuration conf;
public Segment(FileSystem fs, Path segmentDir, Configuration conf) throws IOException {
- this.fs = fs;
this.segmentDir = segmentDir;
this.conf = conf;
}
Modified: nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/GenericWritableConfigurable.java Tue May 21 01:19:26 2013
@@ -41,7 +41,7 @@ public abstract class GenericWritableCon
@Override
public void readFields(DataInput in) throws IOException {
byte type = in.readByte();
- Class clazz = getTypes()[type];
+ Class<?> clazz = getTypes()[type];
try {
set((Writable) clazz.newInstance());
} catch (Exception e) {
Modified: nutch/trunk/src/java/org/apache/nutch/util/PrefixStringMatcher.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/PrefixStringMatcher.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/PrefixStringMatcher.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/PrefixStringMatcher.java Tue May 21 01:19:26 2013
@@ -45,11 +45,11 @@ public class PrefixStringMatcher extends
* @throws ClassCastException if any <code>Object</code>s in the
* collection are not <code>String</code>s
*/
- public PrefixStringMatcher(Collection prefixes) {
+ public PrefixStringMatcher(Collection<String> prefixes) {
super();
- Iterator iter= prefixes.iterator();
+ Iterator<String> iter= prefixes.iterator();
while (iter.hasNext())
- addPatternForward((String)iter.next());
+ addPatternForward(iter.next());
}
/**
Modified: nutch/trunk/src/java/org/apache/nutch/util/SuffixStringMatcher.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/util/SuffixStringMatcher.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/util/SuffixStringMatcher.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/util/SuffixStringMatcher.java Tue May 21 01:19:26 2013
@@ -41,11 +41,11 @@ public class SuffixStringMatcher extends
* <code>String</code>s with any suffix in the supplied
* <code>Collection</code>
*/
- public SuffixStringMatcher(Collection suffixes) {
+ public SuffixStringMatcher(Collection<String> suffixes) {
super();
- Iterator iter= suffixes.iterator();
+ Iterator<String> iter= suffixes.iterator();
while (iter.hasNext())
- addPatternBackward((String)iter.next());
+ addPatternBackward(iter.next());
}
/**
Modified: nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java (original)
+++ nutch/trunk/src/plugin/creativecommons/src/java/org/creativecommons/nutch/CCParseFilter.java Tue May 21 01:19:26 2013
@@ -22,7 +22,6 @@ import org.apache.nutch.parse.*;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.metadata.Metadata;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -217,11 +216,6 @@ public class CCParseFilter implements Ht
if (!CC_NS.equals(predicateElement.getNamespaceURI())) {
continue;
}
- String predicate = predicateElement.getLocalName();
-
- // object is rdf:resource from cc:xxx predicates
- String object =
- predicateElement.getAttributeNodeNS(RDF_NS, "resource").getValue();
// add object and predicate to metadata
// metadata.put(object, predicate);
@@ -234,22 +228,19 @@ public class CCParseFilter implements Ht
// get cc:Work nodes from rdf:RDF
NodeList works = rdf.getElementsByTagNameNS(CC_NS, "Work");
for (int i = 0; i < works.getLength(); i++) {
- Element l = (Element)works.item(i);
-
// get dc:type nodes from cc:Work
NodeList types = rdf.getElementsByTagNameNS(DC_NS, "type");
+
for (int j = 0; j < types.getLength(); j++) {
Element type = (Element)types.item(j);
- String workUri =
- type.getAttributeNodeNS(RDF_NS, "resource").getValue();
- this.workType = (String)WORK_TYPE_NAMES.get(workUri);
- break;
+ String workUri = type.getAttributeNodeNS(RDF_NS, "resource").getValue();
+ this.workType = WORK_TYPE_NAMES.get(workUri);
}
}
}
}
- private static final HashMap WORK_TYPE_NAMES = new HashMap();
+ private static final HashMap<String, String> WORK_TYPE_NAMES = new HashMap<String, String>();
static {
WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/MovingImage", "video");
WORK_TYPE_NAMES.put("http://purl.org/dc/dcmitype/StillImage", "image");
Modified: nutch/trunk/src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java (original)
+++ nutch/trunk/src/plugin/lib-regex-filter/src/test/org/apache/nutch/urlfilter/api/RegexURLFilterBaseTest.java Tue May 21 01:19:26 2013
@@ -109,7 +109,7 @@ public abstract class RegexURLFilterBase
private static FilteredURL[] readURLFile(Reader reader) throws IOException {
BufferedReader in = new BufferedReader(reader);
- List list = new ArrayList();
+ List<FilteredURL> list = new ArrayList<FilteredURL>();
String line;
while((line=in.readLine()) != null) {
if (line.length() != 0) {
Modified: nutch/trunk/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java (original)
+++ nutch/trunk/src/plugin/microformats-reltag/src/java/org/apache/nutch/microformats/reltag/RelTagParser.java Tue May 21 01:19:26 2013
@@ -42,15 +42,12 @@ import org.apache.nutch.util.StringUtil;
// Hadoop imports
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-
/**
* Adds microformat rel-tags of document if found.
*
* @see <a href="http://www.microformats.org/wiki/rel-tag">
* http://www.microformats.org/wiki/rel-tag</a>
- * @author Jérôme Charron
*/
public class RelTagParser implements HtmlParseFilter {
@@ -58,10 +55,8 @@ public class RelTagParser implements Htm
public final static String REL_TAG = "Rel-Tag";
-
private Configuration conf = null;
-
/**
* Scan the HTML document looking at possible rel-tags
*/
@@ -72,25 +67,25 @@ public class RelTagParser implements Htm
Parse parse = parseResult.get(content.getUrl());
// Trying to find the document's rel-tags
Parser parser = new Parser(doc);
- Set tags = parser.getRelTags();
- Iterator iter = tags.iterator();
+ Set<?> tags = parser.getRelTags();
+ Iterator<?> iter = tags.iterator();
Metadata metadata = parse.getData().getParseMeta();
- while (iter.hasNext()) {
+ while (iter.hasNext())
metadata.add(REL_TAG, (String) iter.next());
- }
+
return parseResult;
}
private static class Parser {
- Set tags = null;
+ Set<String> tags = null;
Parser(Node node) {
- tags = new TreeSet();
+ tags = new TreeSet<String>();
parse(node);
}
- Set getRelTags() {
+ Set<String> getRelTags() {
return tags;
}
@@ -120,9 +115,8 @@ public class RelTagParser implements Htm
// Recurse
NodeList children = node.getChildNodes();
- for (int i=0; children != null && i<children.getLength(); i++) {
+ for (int i=0; children != null && i<children.getLength(); i++)
parse(children.item(i));
- }
}
private final static String parseTag(String url) {
@@ -140,11 +134,6 @@ public class RelTagParser implements Htm
}
-
- /* ----------------------------- *
- * <implementation:Configurable> *
- * ----------------------------- */
-
public void setConf(Configuration conf) {
this.conf = conf;
}
@@ -152,9 +141,4 @@ public class RelTagParser implements Htm
public Configuration getConf() {
return this.conf;
}
-
- /* ------------------------------ *
- * </implementation:Configurable> *
- * ------------------------------ */
-
}
Modified: nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java (original)
+++ nutch/trunk/src/plugin/parse-ext/src/java/org/apache/nutch/parse/ext/ExtParser.java Tue May 21 01:19:26 2013
@@ -21,14 +21,12 @@ import org.apache.nutch.protocol.Content
import org.apache.nutch.parse.ParseResult;
import org.apache.nutch.parse.ParseStatus;
import org.apache.nutch.parse.Parser;
-import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.parse.ParseImpl;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.parse.OutlinkExtractor;
import org.apache.nutch.util.CommandRunner;
-import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.net.protocols.Response;
import org.apache.hadoop.conf.Configuration;
@@ -59,12 +57,10 @@ public class ExtParser implements Parser
static final int TIMEOUT_DEFAULT = 30; // in seconds
// handy map from String contentType to String[] {command, timeoutString, encoding}
- Hashtable TYPE_PARAMS_MAP = new Hashtable();
+ Hashtable<String, String[]> TYPE_PARAMS_MAP = new Hashtable<String, String[]>();
private Configuration conf;
- private boolean loaded = false;
-
public ExtParser () { }
public ParseResult getParse(Content content) {
Modified: nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java (original)
+++ nutch/trunk/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/DOMBuilder.java Tue May 21 01:19:26 2013
@@ -58,7 +58,7 @@ public class DOMBuilder
public DocumentFragment m_docFrag = null;
/** Vector of element nodes */
- protected Stack m_elemStack = new Stack();
+ protected Stack<Element> m_elemStack = new Stack<Element>();
/**
* DOMBuilder instance constructor... it will add the DOM nodes
Modified: nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java (original)
+++ nutch/trunk/src/plugin/parse-js/src/java/org/apache/nutch/parse/js/JSParseFilter.java Tue May 21 01:19:26 2013
@@ -42,7 +42,6 @@ import org.apache.nutch.parse.Parser;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
import org.apache.oro.text.regex.MatchResult;
import org.apache.oro.text.regex.Pattern;
import org.apache.oro.text.regex.PatternCompiler;
@@ -60,9 +59,6 @@ import org.w3c.dom.NodeList;
* This class is a heuristic link extractor for JavaScript files and
* code snippets. The general idea of a two-pass regex matching comes from
* Heritrix. Parts of the code come from OutlinkExtractor.java
- * by Stephan Strittmatter.
- *
- * @author Andrzej Bialecki <ab@getopt.org>
*/
public class JSParseFilter implements HtmlParseFilter, Parser {
public static final Logger LOG = LoggerFactory.getLogger(JSParseFilter.class);
@@ -77,12 +73,12 @@ public class JSParseFilter implements Ht
Parse parse = parseResult.get(content.getUrl());
String url = content.getBaseUrl();
- ArrayList outlinks = new ArrayList();
+ ArrayList<Outlink> outlinks = new ArrayList<Outlink>();
walk(doc, parse, metaTags, url, outlinks);
if (outlinks.size() > 0) {
Outlink[] old = parse.getData().getOutlinks();
String title = parse.getData().getTitle();
- List list = Arrays.asList(old);
+ List<Outlink> list = Arrays.asList(old);
outlinks.addAll(list);
ParseStatus status = parse.getData().getStatus();
String text = parse.getText();
@@ -97,14 +93,14 @@ public class JSParseFilter implements Ht
return parseResult;
}
- private void walk(Node n, Parse parse, HTMLMetaTags metaTags, String base, List outlinks) {
+ private void walk(Node n, Parse parse, HTMLMetaTags metaTags, String base, List<Outlink> outlinks) {
if (n instanceof Element) {
String name = n.getNodeName();
if (name.equalsIgnoreCase("script")) {
- String lang = null;
+ /* String lang = null;
Node lNode = n.getAttributes().getNamedItem("language");
if (lNode == null) lang = "javascript";
- else lang = lNode.getNodeValue();
+ else lang = lNode.getNodeValue(); */
StringBuffer script = new StringBuffer();
NodeList nn = n.getChildNodes();
if (nn.getLength() > 0) {
@@ -183,7 +179,7 @@ public class JSParseFilter implements Ht
*/
private Outlink[] getJSLinks(String plainText, String anchor, String base) {
- final List outlinks = new ArrayList();
+ final List<Outlink> outlinks = new ArrayList<Outlink>();
URL baseURL = null;
try {
@@ -265,7 +261,10 @@ public class JSParseFilter implements Ht
BufferedReader br = new BufferedReader(new InputStreamReader(in, "UTF-8"));
StringBuffer sb = new StringBuffer();
String line = null;
- while ((line = br.readLine()) != null) sb.append(line + "\n");
+ while ((line = br.readLine()) != null)
+ sb.append(line + "\n");
+ br.close();
+
JSParseFilter parseFilter = new JSParseFilter();
parseFilter.setConf(NutchConfiguration.create());
Outlink[] links = parseFilter.getJSLinks(sb.toString(), "", args[1]);