You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by te...@apache.org on 2013/05/21 03:19:27 UTC
svn commit: r1484634 [2/2] - in /nutch/trunk: ./
src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/fetcher/
src/java/org/apache/nutch/indexer/ src/java/org/apache/nutch/metadata/
src/java/org/apache/nutch/net/ src/java/org/apache/nutch/parse/ ...
Modified: nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java (original)
+++ nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java Tue May 21 01:19:26 2013
@@ -42,8 +42,6 @@ import com.anotherbigidea.io.InStream;
/**
* Parser for Flash SWF files. Loosely based on the sample in JavaSWF
* distribution.
- *
- * @author Andrzej Bialecki
*/
public class SWFParser implements Parser {
public static final Logger LOG = LoggerFactory.getLogger("org.apache.nutch.parse.swf");
@@ -63,7 +61,7 @@ public class SWFParser implements Parser
public ParseResult getParse(Content content) {
String text = null;
- Vector outlinks = new Vector();
+ Vector<Outlink> outlinks = new Vector<Outlink>();
try {
@@ -120,6 +118,7 @@ public class SWFParser implements Parser
byte[] buf = new byte[in.available()];
in.read(buf);
+ in.close();
SWFParser parser = new SWFParser();
ParseResult parseResult = parser.getParse(new Content("file:" + args[0], "file:" + args[0],
buf, "application/x-shockwave-flash",
@@ -153,13 +152,13 @@ class ExtractText extends SWFTagTypesImp
* character codes for the correspnding font glyphs (An empty array denotes a
* System Font).
*/
- protected HashMap fontCodes = new HashMap();
+ protected HashMap<Integer, int[]> fontCodes = new HashMap<Integer, int[]>();
- public ArrayList strings = new ArrayList();
+ public ArrayList<String> strings = new ArrayList<String>();
- public HashSet actionStrings = new HashSet();
+ public HashSet<String> actionStrings = new HashSet<String>();
- public ArrayList urls = new ArrayList();
+ public ArrayList<String> urls = new ArrayList<String>();
public ExtractText() {
super(null);
@@ -167,7 +166,7 @@ class ExtractText extends SWFTagTypesImp
public String getText() {
StringBuffer res = new StringBuffer();
- Iterator it = strings.iterator();
+ Iterator<String> it = strings.iterator();
while (it.hasNext()) {
if (res.length() > 0) res.append(' ');
res.append(it.next());
@@ -189,7 +188,7 @@ class ExtractText extends SWFTagTypesImp
public String[] getUrls() {
String[] res = new String[urls.size()];
int i = 0;
- Iterator it = urls.iterator();
+ Iterator<String> it = urls.iterator();
while (it.hasNext()) {
res[i] = (String) it.next();
i++;
@@ -350,26 +349,23 @@ class ExtractText extends SWFTagTypesImp
* ActionScript parser. This parser tries to extract free text embedded inside
* the script, but without polluting it too much with names of variables,
* methods, etc. Not ideal, but it works.
- *
- * @author Andrzej Bialecki
*/
class NutchSWFActions extends SWFActionBlockImpl implements SWFActions {
- private HashSet strings = null;
+ private HashSet<String> strings = null;
- private ArrayList urls = null;
+ private ArrayList<String> urls = null;
String[] dict = null;
- Stack stack = null;
+ Stack<Object> stack = null;
- public NutchSWFActions(HashSet strings, ArrayList urls) {
+ public NutchSWFActions(HashSet<String> strings, ArrayList<String> urls) {
this.strings = strings;
this.urls = urls;
stack = new SmallStack(100, strings);
}
public void lookupTable(String[] values) throws IOException {
- // System.out.println("-lookupTable: " + values.length);
for (int i = 0; i < values.length; i++) {
if (!strings.contains(values[i])) strings.add(values[i]);
}
@@ -378,7 +374,6 @@ class NutchSWFActions extends SWFActionB
}
public void defineLocal() throws IOException {
- // System.out.println("-defineLocal");
stack.pop();
super.defineLocal();
}
@@ -398,69 +393,58 @@ class NutchSWFActions extends SWFActionB
}
public SWFActionBlock.TryCatchFinally _try(String var) throws IOException {
- // System.out.println("_try: var=" + var);
// stack.push(var);
strings.remove(var);
return super._try(var);
}
public void comment(String var) throws IOException {
- // System.out.println("-comment: var=" + var);
// stack.push(var);
strings.remove(var);
super.comment(var);
}
public void goToFrame(String var) throws IOException {
- // System.out.println("-goToFrame: var=" + var);
stack.push(var);
strings.remove(var);
super.gotoFrame(var);
}
public void ifJump(String var) throws IOException {
- // System.out.println("-ifJump: var=" + var);
strings.remove(var);
super.ifJump(var);
}
public void jump(String var) throws IOException {
- // System.out.println("-jump: var=" + var);
strings.remove(var);
super.jump(var);
}
public void jumpLabel(String var) throws IOException {
- // System.out.println("-jumpLabel: var=" + var);
strings.remove(var);
super.jumpLabel(var);
}
public void lookup(int var) throws IOException {
- // System.out.println("-lookup: var=" + var);
if (dict != null && var >= 0 && var < dict.length) {
- // System.out.println(" push " + dict[var]);
stack.push(dict[var]);
}
super.lookup(var);
}
public void push(String var) throws IOException {
- // System.out.println("-push: var=" + var);
stack.push(var);
strings.remove(var);
super.push(var);
}
public void setTarget(String var) throws IOException {
- // System.out.println("-setTarget: var=" + var);
stack.push(var);
strings.remove(var);
super.setTarget(var);
}
public SWFActionBlock startFunction(String var, String[] params) throws IOException {
- // System.out.println("-startFunction1: var=" + var);
stack.push(var);
strings.remove(var);
if (params != null) {
@@ -472,7 +456,6 @@ class NutchSWFActions extends SWFActionB
}
public SWFActionBlock startFunction2(String var, int arg1, int arg2, String[] params, int[] arg3) throws IOException {
- // System.out.println("-startFunction2: var=" + var);
stack.push(var);
strings.remove(var);
if (params != null) {
@@ -484,74 +467,61 @@ class NutchSWFActions extends SWFActionB
}
public void waitForFrame(int num, String var) throws IOException {
- // System.out.println("-waitForFrame: var=" + var);
stack.push(var);
strings.remove(var);
super.waitForFrame(num, var);
}
public void waitForFrame(String var) throws IOException {
- // System.out.println("-waitForFrame: var=" + var);
stack.push(var);
strings.remove(var);
super.waitForFrame(var);
}
public void done() throws IOException {
- // System.out.println("-done");
while (stack.size() > 0) {
strings.remove(stack.pop());
}
}
public SWFActionBlock start(int arg0, int arg1) throws IOException {
- // System.out.println("-start");
return this;
}
public SWFActionBlock start(int arg0) throws IOException {
- // System.out.println("-start");
return this;
}
public void add() throws IOException {
- // System.out.println("-add");
super.add();
}
public void asciiToChar() throws IOException {
- // System.out.println("-asciitochar");
super.asciiToChar();
}
public void asciiToCharMB() throws IOException {
- // System.out.println("-asciitocharMB");
super.asciiToCharMB();
}
public void push(int var) throws IOException {
- // System.out.println("-push(int)");
if (dict != null && var >= 0 && var < dict.length) {
- // System.out.println(" push " + dict[var]);
stack.push(dict[var]);
}
super.push(var);
}
public void callFunction() throws IOException {
- // System.out.println("-callFunction");
strings.remove(stack.pop());
super.callFunction();
}
public void callMethod() throws IOException {
- // System.out.println("-callMethod");
strings.remove(stack.pop());
super.callMethod();
}
public void getMember() throws IOException {
- // System.out.println("-getMember");
// 0: name
String val = (String) stack.pop();
strings.remove(val);
@@ -560,116 +530,97 @@ class NutchSWFActions extends SWFActionB
public void setMember() throws IOException {
// 0: value -1: name
- String val = (String) stack.pop();
+ stack.pop(); // value
String name = (String) stack.pop();
- // System.out.println("-setMember: name=" + name + ", val=" + val);
strings.remove(name);
super.setMember();
}
public void setProperty() throws IOException {
- // System.out.println("-setProperty");
super.setProperty();
}
public void setVariable() throws IOException {
- // System.out.println("-setVariable");
super.setVariable();
}
public void call() throws IOException {
- // System.out.println("-call");
strings.remove(stack.pop());
super.call();
}
public void setTarget() throws IOException {
- // System.out.println("-setTarget");
strings.remove(stack.pop());
super.setTarget();
}
public void pop() throws IOException {
- // System.out.println("-pop");
strings.remove(stack.pop());
super.pop();
}
public void push(boolean arg0) throws IOException {
- // System.out.println("-push(b)");
stack.push("" + arg0);
super.push(arg0);
}
public void push(double arg0) throws IOException {
- // System.out.println("-push(d)");
stack.push("" + arg0);
super.push(arg0);
}
public void push(float arg0) throws IOException {
- // System.out.println("-push(f)");
stack.push("" + arg0);
super.push(arg0);
}
public void pushNull() throws IOException {
- // System.out.println("-push(null)");
stack.push("");
super.pushNull();
}
public void pushRegister(int arg0) throws IOException {
- // System.out.println("-push(reg)");
stack.push("" + arg0);
super.pushRegister(arg0);
}
public void pushUndefined() throws IOException {
- // System.out.println("-push(undef)");
stack.push("???");
super.pushUndefined();
}
public void getProperty() throws IOException {
- // System.out.println("-getProperty");
stack.pop();
super.getProperty();
}
public void getVariable() throws IOException {
- // System.out.println("-getVariable");
strings.remove(stack.pop());
super.getVariable();
}
public void gotoFrame(boolean arg0) throws IOException {
- // System.out.println("-gotoFrame(b)");
stack.push("" + arg0);
super.gotoFrame(arg0);
}
public void gotoFrame(int arg0) throws IOException {
- // System.out.println("-gotoFrame(int)");
stack.push("" + arg0);
super.gotoFrame(arg0);
}
public void gotoFrame(String arg0) throws IOException {
- // System.out.println("-gotoFrame(string)");
stack.push("" + arg0);
strings.remove(arg0);
super.gotoFrame(arg0);
}
public void newObject() throws IOException {
- // System.out.println("-newObject");
stack.pop();
super.newObject();
}
public SWFActionBlock startWith() throws IOException {
- // System.out.println("-startWith");
return this;
}
@@ -678,13 +629,15 @@ class NutchSWFActions extends SWFActionB
/*
* Small bottom-less stack.
*/
-class SmallStack extends Stack {
+class SmallStack extends Stack<Object> {
+
+ private static final long serialVersionUID = 1L;
private int maxSize;
- private HashSet strings = null;
+ private HashSet<String> strings = null;
- public SmallStack(int maxSize, HashSet strings) {
+ public SmallStack(int maxSize, HashSet<String> strings) {
this.maxSize = maxSize;
this.strings = strings;
}
Modified: nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java (original)
+++ nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java Tue May 21 01:19:26 2013
@@ -32,13 +32,12 @@ import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.nutch.util.NutchConfiguration;
+import org.mortbay.log.Log;
import junit.framework.TestCase;
/**
* Unit tests for SWFParser.
- *
- * @author Andrzej Bialecki
*/
public class TestSWFParser extends TestCase {
@@ -48,7 +47,6 @@ public class TestSWFParser extends TestC
private String[] sampleFiles = new String[]{"test1.swf", "test2.swf", "test3.swf"};
private String[] sampleTexts = new String[]{"test1.txt", "test2.txt", "test3.txt"};
- private String[] texts = new String[sampleTexts.length];
public TestSWFParser(String name) {
super(name);
@@ -94,5 +92,4 @@ public class TestSWFParser extends TestC
assertTrue(sampleTexts[i].equals(text));
}
}
-
}
Modified: nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java (original)
+++ nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java Tue May 21 01:19:26 2013
@@ -58,7 +58,7 @@ class DOMBuilder
public DocumentFragment m_docFrag = null;
/** Vector of element nodes */
- protected Stack m_elemStack = new Stack();
+ protected Stack<Element> m_elemStack = new Stack<Element>();
/**
* DOMBuilder instance constructor... it will add the DOM nodes
Modified: nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java (original)
+++ nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java Tue May 21 01:19:26 2013
@@ -59,7 +59,8 @@ public class TikaParser implements org.a
private HtmlParseFilters htmlParseFilters;
private String cachingPolicy;
- public ParseResult getParse(Content content) {
+ @SuppressWarnings("deprecation")
+ public ParseResult getParse(Content content) {
String mimeType = content.getContentType();
URL base;
Modified: nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java (original)
+++ nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java Tue May 21 01:19:26 2013
@@ -18,15 +18,12 @@
package org.apache.nutch.parse.zip;
import java.io.ByteArrayInputStream;
-import java.io.InputStream;
-import java.util.Properties;
import java.util.ArrayList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.apache.nutch.metadata.Metadata;
import org.apache.nutch.net.protocols.Response;
import org.apache.nutch.parse.Outlink;
import org.apache.nutch.parse.ParseData;
@@ -40,8 +37,6 @@ import org.apache.hadoop.conf.Configurat
/**
* ZipParser class based on MSPowerPointParser class by Stephan Strittmatter.
* Nutch parse plugin for zip files - Content Type : application/zip
- *
- * @author Rohit Kulkarni & Ashish Vaidya
*/
public class ZipParser implements Parser {
@@ -57,17 +52,13 @@ public class ZipParser implements Parser
String resultText = null;
String resultTitle = null;
Outlink[] outlinks = null;
- List outLinksList = new ArrayList();
- Properties properties = null;
+ List<Outlink> outLinksList = new ArrayList<Outlink>();
try {
final String contentLen = content.getMetadata().get(Response.CONTENT_LENGTH);
final int len = Integer.parseInt(contentLen);
if (LOG.isDebugEnabled()) { LOG.debug("ziplen: " + len); }
final byte[] contentInBytes = content.getContent();
- final ByteArrayInputStream bainput = new ByteArrayInputStream(
- contentInBytes);
- final InputStream input = bainput;
if (contentLen != null && contentInBytes.length != len) {
return new ParseStatus(ParseStatus.FAILED,
Modified: nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java (original)
+++ nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java Tue May 21 01:19:26 2013
@@ -77,9 +77,9 @@ public class Client extends FTP
private int __dataTimeout;
private int __passivePort;
private String __passiveHost;
- private int __fileType, __fileFormat;
+// private int __fileType, __fileFormat;
private boolean __remoteVerificationEnabled;
- private FTPFileEntryParser __entryParser;
+// private FTPFileEntryParser __entryParser;
private String __systemName;
// constructor
@@ -95,10 +95,10 @@ public class Client extends FTP
{
__passiveHost = null;
__passivePort = -1;
- __fileType = FTP.ASCII_FILE_TYPE;
- __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
__systemName = null;
- __entryParser = null;
+// __fileType = FTP.ASCII_FILE_TYPE;
+// __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
+// __entryParser = null;
}
// parse reply for pass()
@@ -315,7 +315,7 @@ public class Client extends FTP
}
// retrieve list reply for path
- public void retrieveList(String path, List entries, int limit,
+ public void retrieveList(String path, List<FTPFile> entries, int limit,
FTPFileEntryParser parser)
throws IOException,
FtpExceptionCanNotHaveDataConnection,
@@ -331,7 +331,7 @@ public class Client extends FTP
new BufferedReader(new InputStreamReader(socket.getInputStream()));
// force-close data channel socket, when download limit is reached
- boolean mandatory_close = false;
+// boolean mandatory_close = false;
//List entries = new LinkedList();
int count = 0;
@@ -348,7 +348,7 @@ public class Client extends FTP
// impose download limit if limit >= 0, otherwise no limit
// here, cut off is up to the line when total bytes is just over limit
if (limit >= 0 && count > limit) {
- mandatory_close = true;
+// mandatory_close = true;
break;
}
line = parser.readNextEntry(reader);
@@ -403,7 +403,7 @@ public class Client extends FTP
// fixme, should we instruct server here for binary file type?
// force-close data channel socket
- boolean mandatory_close = false;
+ // boolean mandatory_close = false;
int len; int count = 0;
byte[] buf =
@@ -414,7 +414,7 @@ public class Client extends FTP
// here, cut off is exactly of limit bytes
if (limit >= 0 && count > limit) {
os.write(buf,0,len-(count-limit));
- mandatory_close = true;
+ // mandatory_close = true;
break;
}
os.write(buf,0,len);
@@ -502,8 +502,8 @@ public class Client extends FTP
{
if (FTPReply.isPositiveCompletion(type(fileType)))
{
- __fileType = fileType;
- __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
+/* __fileType = fileType;
+ __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;*/
return true;
}
return false;
Modified: nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java (original)
+++ nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java Tue May 21 01:19:26 2013
@@ -55,7 +55,7 @@ public class Ftp implements Protocol {
public static final Logger LOG = LoggerFactory.getLogger(Ftp.class);
- static final int BUFFER_SIZE = 16384; // 16*1024 = 16384
+ private static final int BUFFER_SIZE = 16384; // 16*1024 = 16384
static final int MAX_REDIRECTS = 5;
@@ -257,5 +257,9 @@ public class Ftp implements Protocol {
public BaseRobotRules getRobotRules(Text url, CrawlDatum datum) {
return RobotRulesParser.EMPTY_RULES;
}
+
+ public int getBufferSize() {
+ return BUFFER_SIZE;
+ }
}
Modified: nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java (original)
+++ nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java Tue May 21 01:19:26 2013
@@ -17,11 +17,9 @@
package org.apache.nutch.protocol.ftp;
-
import org.apache.commons.net.ftp.FTP;
import org.apache.commons.net.ftp.FTPFile;
import org.apache.commons.net.ftp.FTPReply;
-
import org.apache.commons.net.ftp.parser.DefaultFTPFileEntryParserFactory;
import org.apache.commons.net.ftp.parser.ParserInitializationException;
@@ -42,8 +40,7 @@ import java.util.LinkedList;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
-
-/************************************
+/**
* FtpResponse.java mimics ftp replies as http response.
* It tries its best to follow http's way for headers, response codes
* as well as exceptions.
@@ -53,9 +50,7 @@ import java.io.IOException;
* and some important commons-net exceptions passed by Client.java
* must have been properly dealt with. They'd better not be leaked
* to the caller of this class.
- *
- * @author John Xing
- ***********************************/
+ */
public class FtpResponse {
private String orig;
@@ -146,7 +141,7 @@ public class FtpResponse {
// follow ftp talk?
if (ftp.followTalk)
ftp.client.addProtocolCommandListener(
- new PrintCommandListener(ftp.LOG));
+ new PrintCommandListener(Ftp.LOG));
}
// quit from previous site if at a different site now
@@ -284,8 +279,8 @@ public class FtpResponse {
}
} catch (Exception e) {
- if (ftp.LOG.isWarnEnabled()) {
- ftp.LOG.warn("Error: ", e);
+ if (Ftp.LOG.isWarnEnabled()) {
+ Ftp.LOG.warn("Error: ", e);
}
// for any un-foreseen exception (run time exception or not),
// do ultimate clean and leave ftp.client for garbage collection
@@ -312,11 +307,11 @@ public class FtpResponse {
throws IOException {
ByteArrayOutputStream os = null;
- List list = null;
+ List<FTPFile> list = null;
try {
// first get its possible attributes
- list = new LinkedList();
+ list = new LinkedList<FTPFile>();
ftp.client.retrieveList(path, list, ftp.maxContentLength, ftp.parser);
FTPFile ftpFile = (FTPFile) list.get(0);
@@ -329,7 +324,7 @@ public class FtpResponse {
code = 304;
return;
}
- os = new ByteArrayOutputStream(ftp.BUFFER_SIZE);
+ os = new ByteArrayOutputStream(ftp.getBufferSize());
ftp.client.retrieveFile(path, os, ftp.maxContentLength);
this.content = os.toByteArray();
@@ -414,7 +409,7 @@ public class FtpResponse {
// get ftp dir list as http response
private void getDirAsHttpResponse(String path, long lastModified)
throws IOException {
- List list = new LinkedList();
+ List<FTPFile> list = new LinkedList<FTPFile>();
try {
@@ -482,7 +477,7 @@ public class FtpResponse {
}
// generate html page from ftp dir list
- private byte[] list2html(List list, String path, boolean includeDotDot) {
+ private byte[] list2html(List<FTPFile> list, String path, boolean includeDotDot) {
//StringBuffer x = new StringBuffer("<!doctype html public \"-//ietf//dtd html//en\"><html><head>");
StringBuffer x = new StringBuffer("<html><head>");
Modified: nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java (original)
+++ nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java Tue May 21 01:19:26 2013
@@ -42,6 +42,8 @@ import org.apache.commons.httpclient.NTC
import org.apache.commons.httpclient.auth.AuthScope;
import org.apache.commons.httpclient.params.HttpConnectionManagerParams;
import org.apache.commons.httpclient.protocol.Protocol;
+import org.apache.commons.httpclient.protocol.ProtocolSocketFactory;
+import org.apache.commons.httpclient.protocol.SSLProtocolSocketFactory;
// Nutch imports
import org.apache.nutch.crawl.CrawlDatum;
@@ -158,8 +160,8 @@ public class Http extends HttpBase {
private void configureClient() {
// Set up an HTTPS socket factory that accepts self-signed certs.
- Protocol https = new Protocol("https",
- new DummySSLProtocolSocketFactory(), 443);
+ ProtocolSocketFactory factory = new SSLProtocolSocketFactory();
+ Protocol https = new Protocol("https", factory, 443);
Protocol.registerProtocol("https", https);
HttpConnectionManagerParams params = connectionManager.getParams();
@@ -174,7 +176,7 @@ public class Http extends HttpBase {
client.getParams().setConnectionManagerTimeout(timeout);
HostConfiguration hostConf = client.getHostConfiguration();
- ArrayList headers = new ArrayList();
+ ArrayList<Header> headers = new ArrayList<Header>();
// Set the User Agent in the header
headers.add(new Header("User-Agent", userAgent));
// prefer English
@@ -199,7 +201,7 @@ public class Http extends HttpBase {
NTCredentials proxyCredentials = new NTCredentials(
this.proxyUsername, this.proxyPassword,
- this.agentHost, this.proxyRealm);
+ Http.agentHost, this.proxyRealm);
client.getState().setProxyCredentials(
proxyAuthScope, proxyCredentials);
Modified: nutch/trunk/src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java (original)
+++ nutch/trunk/src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java Tue May 21 01:19:26 2013
@@ -23,7 +23,6 @@ import junit.framework.TestCase;
import org.mortbay.jetty.Server;
import org.mortbay.jetty.bio.SocketConnector;
import org.mortbay.jetty.handler.ContextHandler;
-import org.mortbay.jetty.handler.ResourceHandler;
import org.mortbay.jetty.servlet.ServletHandler;
import org.mortbay.jetty.servlet.SessionHandler;
import org.apache.hadoop.conf.Configuration;
@@ -32,8 +31,6 @@ import org.apache.nutch.net.protocols.Re
/**
* Test cases for protocol-httpclient.
- *
- * @author Susam Pal
*/
public class TestProtocolHttpClient extends TestCase {
Modified: nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java (original)
+++ nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java Tue May 21 01:19:26 2013
@@ -14,9 +14,6 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
-
-// $Id$
-
package org.apache.nutch.urlfilter.prefix;
import org.slf4j.Logger;
@@ -79,7 +76,7 @@ public class PrefixURLFilter implements
throws IOException {
BufferedReader in=new BufferedReader(reader);
- List urlprefixes = new ArrayList();
+ List<String> urlprefixes = new ArrayList<String>();
String line;
while((line=in.readLine())!=null) {
Modified: nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java (original)
+++ nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java Tue May 21 01:19:26 2013
@@ -181,7 +181,7 @@ public class SuffixURLFilter implements
return;
}
BufferedReader in = new BufferedReader(reader);
- List aSuffixes = new ArrayList();
+ List<String> aSuffixes = new ArrayList<String>();
boolean allow = false;
boolean ignore = false;
String line;
Modified: nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java (original)
+++ nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java Tue May 21 01:19:26 2013
@@ -44,9 +44,9 @@ public class TestCrawlDbMerger extends T
url21
};
- TreeSet init1 = new TreeSet();
- TreeSet init2 = new TreeSet();
- HashMap expected = new HashMap();
+ TreeSet<String> init1 = new TreeSet<String>();
+ TreeSet<String> init2 = new TreeSet<String>();
+ HashMap<String, CrawlDatum> expected = new HashMap<String, CrawlDatum>();
CrawlDatum cd1, cd2, cd3;
Configuration conf;
FileSystem fs;
@@ -83,6 +83,7 @@ public class TestCrawlDbMerger extends T
fs.mkdirs(testDir);
}
+ @SuppressWarnings("deprecation")
public void tearDown() {
try {
if (fs.exists(testDir))
@@ -93,6 +94,7 @@ public class TestCrawlDbMerger extends T
} catch (Exception e) { }
}
+ @SuppressWarnings("deprecation")
public void testMerge() throws Exception {
Path crawldb1 = new Path(testDir, "crawldb1");
Path crawldb2 = new Path(testDir, "crawldb2");
@@ -105,11 +107,11 @@ public class TestCrawlDbMerger extends T
LOG.fine("* reading crawldb: " + output);
reader = new CrawlDbReader();
String crawlDb = output.toString();
- Iterator it = expected.keySet().iterator();
+ Iterator<String> it = expected.keySet().iterator();
while (it.hasNext()) {
- String url = (String)it.next();
+ String url = it.next();
LOG.fine("url=" + url);
- CrawlDatum cd = (CrawlDatum)expected.get(url);
+ CrawlDatum cd = expected.get(url);
CrawlDatum res = reader.get(crawlDb, url, conf);
LOG.fine(" -> " + res);
System.out.println("url=" + url);
@@ -123,13 +125,13 @@ public class TestCrawlDbMerger extends T
fs.delete(testDir);
}
- private void createCrawlDb(Configuration config, FileSystem fs, Path crawldb, TreeSet init, CrawlDatum cd) throws Exception {
+ private void createCrawlDb(Configuration config, FileSystem fs, Path crawldb, TreeSet<String> init, CrawlDatum cd) throws Exception {
LOG.fine("* creating crawldb: " + crawldb);
Path dir = new Path(crawldb, CrawlDb.CURRENT_NAME);
MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir, "part-00000").toString(), Text.class, CrawlDatum.class);
- Iterator it = init.iterator();
+ Iterator<String> it = init.iterator();
while (it.hasNext()) {
- String key = (String)it.next();
+ String key = it.next();
writer.append(new Text(key), cd);
}
writer.close();
Modified: nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java (original)
+++ nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java Tue May 21 01:19:26 2013
@@ -68,9 +68,9 @@ public class TestLinkDbMerger extends Te
String[] urls20_expected = urls11_expected;
String[] urls21_expected = urls21;
- TreeMap init1 = new TreeMap();
- TreeMap init2 = new TreeMap();
- HashMap expected = new HashMap();
+ TreeMap<String, String[]> init1 = new TreeMap<String, String[]>();
+ TreeMap<String, String[]> init2 = new TreeMap<String, String[]>();
+ HashMap<String, String[]> expected = new HashMap<String, String[]>();
Configuration conf;
Path testDir;
FileSystem fs;
@@ -116,16 +116,16 @@ public class TestLinkDbMerger extends Te
merger.merge(output, new Path[]{linkdb1, linkdb2}, false, false);
LOG.fine("* reading linkdb: " + output);
reader = new LinkDbReader(conf, output);
- Iterator it = expected.keySet().iterator();
+ Iterator<String> it = expected.keySet().iterator();
while (it.hasNext()) {
- String url = (String)it.next();
+ String url = it.next();
LOG.fine("url=" + url);
- String[] vals = (String[])expected.get(url);
+ String[] vals = expected.get(url);
Inlinks inlinks = reader.getInlinks(new Text(url));
// may not be null
assertNotNull(inlinks);
- ArrayList links = new ArrayList();
- Iterator it2 = inlinks.iterator();
+ ArrayList<String> links = new ArrayList<String>();
+ Iterator<?> it2 = inlinks.iterator();
while (it2.hasNext()) {
Inlink in = (Inlink)it2.next();
links.add(in.getFromUrl());
@@ -139,15 +139,15 @@ public class TestLinkDbMerger extends Te
fs.delete(testDir, true);
}
- private void createLinkDb(Configuration config, FileSystem fs, Path linkdb, TreeMap init) throws Exception {
+ private void createLinkDb(Configuration config, FileSystem fs, Path linkdb, TreeMap<String, String[]> init) throws Exception {
LOG.fine("* creating linkdb: " + linkdb);
Path dir = new Path(linkdb, LinkDb.CURRENT_NAME);
MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir, "part-00000").toString(), Text.class, Inlinks.class);
- Iterator it = init.keySet().iterator();
+ Iterator<String> it = init.keySet().iterator();
while (it.hasNext()) {
- String key = (String)it.next();
+ String key = it.next();
Inlinks inlinks = new Inlinks();
- String[] vals = (String[])init.get(key);
+ String[] vals = init.get(key);
for (int i = 0; i < vals.length; i++) {
Inlink in = new Inlink(vals[i], vals[i]);
inlinks.add(in);
Modified: nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java (original)
+++ nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java Tue May 21 01:19:26 2013
@@ -35,13 +35,11 @@ import org.apache.nutch.util.NutchJob;
/**
* Unit tests for the plugin system
- *
- * @author joa23
*/
public class TestPluginSystem extends TestCase {
private int fPluginCount;
- private LinkedList fFolders = new LinkedList();
+ private LinkedList<File> fFolders = new LinkedList<File>();
private Configuration conf ;
private PluginRepository repository;
@@ -62,11 +60,10 @@ public class TestPluginSystem extends Te
*/
protected void tearDown() throws Exception {
for (int i = 0; i < fFolders.size(); i++) {
- File folder = (File) fFolders.get(i);
+ File folder = fFolders.get(i);
delete(folder);
folder.delete();
}
-
}
/**
Modified: nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java (original)
+++ nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java Tue May 21 01:19:26 2013
@@ -42,11 +42,11 @@ public class TestSegmentMerger extends T
public void setUp() throws Exception {
conf = NutchConfiguration.create();
fs = FileSystem.get(conf);
- long blkSize = fs.getDefaultBlockSize();
testDir = new Path(conf.get("hadoop.tmp.dir"), "merge-" + System.currentTimeMillis());
seg1 = new Path(testDir, "seg1");
seg2 = new Path(testDir, "seg2");
out = new Path(testDir, "out");
+
// create large parse-text segments
System.err.println("Creating large segment 1...");
DecimalFormat df = new DecimalFormat("0000000");
@@ -55,6 +55,9 @@ public class TestSegmentMerger extends T
MapFile.Writer w = new MapFile.Writer(conf, fs, ptPath.toString(), Text.class, ParseText.class);
long curSize = 0;
countSeg1 = 0;
+ FileStatus fileStatus = fs.getFileStatus(ptPath);
+ long blkSize = fileStatus.getBlockSize();
+
while (curSize < blkSize * 2) {
k.set("seg1-" + df.format(countSeg1));
w.append(k, new ParseText("seg1 text " + countSeg1));