You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nutch.apache.org by te...@apache.org on 2013/05/21 03:19:27 UTC

svn commit: r1484634 [2/2] - in /nutch/trunk: ./ src/java/org/apache/nutch/crawl/ src/java/org/apache/nutch/fetcher/ src/java/org/apache/nutch/indexer/ src/java/org/apache/nutch/metadata/ src/java/org/apache/nutch/net/ src/java/org/apache/nutch/parse/ ...

Modified: nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java (original)
+++ nutch/trunk/src/plugin/parse-swf/src/java/org/apache/nutch/parse/swf/SWFParser.java Tue May 21 01:19:26 2013
@@ -42,8 +42,6 @@ import com.anotherbigidea.io.InStream;
 /**
  * Parser for Flash SWF files. Loosely based on the sample in JavaSWF
  * distribution.
- * 
- * @author Andrzej Bialecki
  */
 public class SWFParser implements Parser {
   public static final Logger LOG = LoggerFactory.getLogger("org.apache.nutch.parse.swf");
@@ -63,7 +61,7 @@ public class SWFParser implements Parser
   public ParseResult getParse(Content content) {
 
     String text = null;
-    Vector outlinks = new Vector();
+    Vector<Outlink> outlinks = new Vector<Outlink>();
 
     try {
 
@@ -120,6 +118,7 @@ public class SWFParser implements Parser
 
     byte[] buf = new byte[in.available()];
     in.read(buf);
+    in.close();
     SWFParser parser = new SWFParser();
     ParseResult parseResult = parser.getParse(new Content("file:" + args[0], "file:" + args[0],
                                           buf, "application/x-shockwave-flash",
@@ -153,13 +152,13 @@ class ExtractText extends SWFTagTypesImp
    * character codes for the correspnding font glyphs (An empty array denotes a
    * System Font).
    */
-  protected HashMap fontCodes = new HashMap();
+  protected HashMap<Integer, int[]> fontCodes = new HashMap<Integer, int[]>();
 
-  public ArrayList strings = new ArrayList();
+  public ArrayList<String> strings = new ArrayList<String>();
 
-  public HashSet actionStrings = new HashSet();
+  public HashSet<String> actionStrings = new HashSet<String>();
 
-  public ArrayList urls = new ArrayList();
+  public ArrayList<String> urls = new ArrayList<String>();
 
   public ExtractText() {
     super(null);
@@ -167,7 +166,7 @@ class ExtractText extends SWFTagTypesImp
 
   public String getText() {
     StringBuffer res = new StringBuffer();
-    Iterator it = strings.iterator();
+    Iterator<String> it = strings.iterator();
     while (it.hasNext()) {
       if (res.length() > 0) res.append(' ');
       res.append(it.next());
@@ -189,7 +188,7 @@ class ExtractText extends SWFTagTypesImp
   public String[] getUrls() {
     String[] res = new String[urls.size()];
     int i = 0;
-    Iterator it = urls.iterator();
+    Iterator<String> it = urls.iterator();
     while (it.hasNext()) {
       res[i] = (String) it.next();
       i++;
@@ -350,26 +349,23 @@ class ExtractText extends SWFTagTypesImp
  * ActionScript parser. This parser tries to extract free text embedded inside
  * the script, but without polluting it too much with names of variables,
  * methods, etc. Not ideal, but it works.
- * 
- * @author Andrzej Bialecki
  */
 class NutchSWFActions extends SWFActionBlockImpl implements SWFActions {
-  private HashSet strings = null;
+  private HashSet<String> strings = null;
 
-  private ArrayList urls = null;
+  private ArrayList<String> urls = null;
 
   String[] dict = null;
 
-  Stack stack = null;
+  Stack<Object> stack = null;
 
-  public NutchSWFActions(HashSet strings, ArrayList urls) {
+  public NutchSWFActions(HashSet<String> strings, ArrayList<String> urls) {
     this.strings = strings;
     this.urls = urls;
     stack = new SmallStack(100, strings);
   }
 
   public void lookupTable(String[] values) throws IOException {
-    // System.out.println("-lookupTable: " + values.length);
     for (int i = 0; i < values.length; i++) {
       if (!strings.contains(values[i])) strings.add(values[i]);
     }
@@ -378,7 +374,6 @@ class NutchSWFActions extends SWFActionB
   }
 
   public void defineLocal() throws IOException {
-    // System.out.println("-defineLocal");
     stack.pop();
     super.defineLocal();
   }
@@ -398,69 +393,58 @@ class NutchSWFActions extends SWFActionB
   }
 
   public SWFActionBlock.TryCatchFinally _try(String var) throws IOException {
-    // System.out.println("_try: var=" + var);
     // stack.push(var);
     strings.remove(var);
     return super._try(var);
   }
 
   public void comment(String var) throws IOException {
-    // System.out.println("-comment: var=" + var);
     // stack.push(var);
     strings.remove(var);
     super.comment(var);
   }
 
   public void goToFrame(String var) throws IOException {
-    // System.out.println("-goToFrame: var=" + var);
     stack.push(var);
     strings.remove(var);
     super.gotoFrame(var);
   }
 
   public void ifJump(String var) throws IOException {
-    // System.out.println("-ifJump: var=" + var);
     strings.remove(var);
     super.ifJump(var);
   }
 
   public void jump(String var) throws IOException {
-    // System.out.println("-jump: var=" + var);
     strings.remove(var);
     super.jump(var);
   }
 
   public void jumpLabel(String var) throws IOException {
-    // System.out.println("-jumpLabel: var=" + var);
     strings.remove(var);
     super.jumpLabel(var);
   }
 
   public void lookup(int var) throws IOException {
-    // System.out.println("-lookup: var=" + var);
     if (dict != null && var >= 0 && var < dict.length) {
-      // System.out.println(" push " + dict[var]);
       stack.push(dict[var]);
     }
     super.lookup(var);
   }
 
   public void push(String var) throws IOException {
-    // System.out.println("-push: var=" + var);
     stack.push(var);
     strings.remove(var);
     super.push(var);
   }
 
   public void setTarget(String var) throws IOException {
-    // System.out.println("-setTarget: var=" + var);
     stack.push(var);
     strings.remove(var);
     super.setTarget(var);
   }
 
   public SWFActionBlock startFunction(String var, String[] params) throws IOException {
-    // System.out.println("-startFunction1: var=" + var);
     stack.push(var);
     strings.remove(var);
     if (params != null) {
@@ -472,7 +456,6 @@ class NutchSWFActions extends SWFActionB
   }
 
   public SWFActionBlock startFunction2(String var, int arg1, int arg2, String[] params, int[] arg3) throws IOException {
-    // System.out.println("-startFunction2: var=" + var);
     stack.push(var);
     strings.remove(var);
     if (params != null) {
@@ -484,74 +467,61 @@ class NutchSWFActions extends SWFActionB
   }
 
   public void waitForFrame(int num, String var) throws IOException {
-    // System.out.println("-waitForFrame: var=" + var);
     stack.push(var);
     strings.remove(var);
     super.waitForFrame(num, var);
   }
 
   public void waitForFrame(String var) throws IOException {
-    // System.out.println("-waitForFrame: var=" + var);
     stack.push(var);
     strings.remove(var);
     super.waitForFrame(var);
   }
 
   public void done() throws IOException {
-    // System.out.println("-done");
     while (stack.size() > 0) {
       strings.remove(stack.pop());
     }
   }
 
   public SWFActionBlock start(int arg0, int arg1) throws IOException {
-    // System.out.println("-start");
     return this;
   }
 
   public SWFActionBlock start(int arg0) throws IOException {
-    // System.out.println("-start");
     return this;
   }
 
   public void add() throws IOException {
-    // System.out.println("-add");
     super.add();
   }
 
   public void asciiToChar() throws IOException {
-    // System.out.println("-asciitochar");
     super.asciiToChar();
   }
 
   public void asciiToCharMB() throws IOException {
-    // System.out.println("-asciitocharMB");
     super.asciiToCharMB();
   }
 
   public void push(int var) throws IOException {
-    // System.out.println("-push(int)");
     if (dict != null && var >= 0 && var < dict.length) {
-      // System.out.println(" push " + dict[var]);
       stack.push(dict[var]);
     }
     super.push(var);
   }
 
   public void callFunction() throws IOException {
-    // System.out.println("-callFunction");
     strings.remove(stack.pop());
     super.callFunction();
   }
 
   public void callMethod() throws IOException {
-    // System.out.println("-callMethod");
     strings.remove(stack.pop());
     super.callMethod();
   }
 
   public void getMember() throws IOException {
-    // System.out.println("-getMember");
     // 0: name
     String val = (String) stack.pop();
     strings.remove(val);
@@ -560,116 +530,97 @@ class NutchSWFActions extends SWFActionB
 
   public void setMember() throws IOException {
     // 0: value -1: name
-    String val = (String) stack.pop();
+    stack.pop(); // value
     String name = (String) stack.pop();
-    // System.out.println("-setMember: name=" + name + ", val=" + val);
     strings.remove(name);
     super.setMember();
   }
 
   public void setProperty() throws IOException {
-    // System.out.println("-setProperty");
     super.setProperty();
   }
 
   public void setVariable() throws IOException {
-    // System.out.println("-setVariable");
     super.setVariable();
   }
 
   public void call() throws IOException {
-    // System.out.println("-call");
     strings.remove(stack.pop());
     super.call();
   }
 
   public void setTarget() throws IOException {
-    // System.out.println("-setTarget");
     strings.remove(stack.pop());
     super.setTarget();
   }
 
   public void pop() throws IOException {
-    // System.out.println("-pop");
     strings.remove(stack.pop());
     super.pop();
   }
 
   public void push(boolean arg0) throws IOException {
-    // System.out.println("-push(b)");
     stack.push("" + arg0);
     super.push(arg0);
   }
 
   public void push(double arg0) throws IOException {
-    // System.out.println("-push(d)");
     stack.push("" + arg0);
     super.push(arg0);
   }
 
   public void push(float arg0) throws IOException {
-    // System.out.println("-push(f)");
     stack.push("" + arg0);
     super.push(arg0);
   }
 
   public void pushNull() throws IOException {
-    // System.out.println("-push(null)");
     stack.push("");
     super.pushNull();
   }
 
   public void pushRegister(int arg0) throws IOException {
-    // System.out.println("-push(reg)");
     stack.push("" + arg0);
     super.pushRegister(arg0);
   }
 
   public void pushUndefined() throws IOException {
-    // System.out.println("-push(undef)");
     stack.push("???");
     super.pushUndefined();
   }
 
   public void getProperty() throws IOException {
-    // System.out.println("-getProperty");
     stack.pop();
     super.getProperty();
   }
 
   public void getVariable() throws IOException {
-    // System.out.println("-getVariable");
     strings.remove(stack.pop());
     super.getVariable();
   }
 
   public void gotoFrame(boolean arg0) throws IOException {
-    // System.out.println("-gotoFrame(b)");
     stack.push("" + arg0);
     super.gotoFrame(arg0);
   }
 
   public void gotoFrame(int arg0) throws IOException {
-    // System.out.println("-gotoFrame(int)");
     stack.push("" + arg0);
     super.gotoFrame(arg0);
   }
 
   public void gotoFrame(String arg0) throws IOException {
-    // System.out.println("-gotoFrame(string)");
     stack.push("" + arg0);
     strings.remove(arg0);
     super.gotoFrame(arg0);
   }
 
   public void newObject() throws IOException {
-    // System.out.println("-newObject");
     stack.pop();
     super.newObject();
   }
 
   public SWFActionBlock startWith() throws IOException {
-    // System.out.println("-startWith");
     return this;
   }
 
@@ -678,13 +629,15 @@ class NutchSWFActions extends SWFActionB
 /*
  * Small bottom-less stack.
  */
-class SmallStack extends Stack {
+class SmallStack extends Stack<Object> {
+
+  private static final long serialVersionUID = 1L;
 
   private int maxSize;
 
-  private HashSet strings = null;
+  private HashSet<String> strings = null;
 
-  public SmallStack(int maxSize, HashSet strings) {
+  public SmallStack(int maxSize, HashSet<String> strings) {
     this.maxSize = maxSize;
     this.strings = strings;
   }

Modified: nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java (original)
+++ nutch/trunk/src/plugin/parse-swf/src/test/org/apache/nutch/parse/swf/TestSWFParser.java Tue May 21 01:19:26 2013
@@ -32,13 +32,12 @@ import org.apache.nutch.parse.Parse;
 import org.apache.nutch.parse.ParseException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.util.NutchConfiguration;
+import org.mortbay.log.Log;
 
 import junit.framework.TestCase;
 
 /** 
  * Unit tests for SWFParser.
- *
- * @author Andrzej Bialecki
  */
 public class TestSWFParser extends TestCase {
 
@@ -48,7 +47,6 @@ public class TestSWFParser extends TestC
   
   private String[] sampleFiles = new String[]{"test1.swf", "test2.swf", "test3.swf"};
   private String[] sampleTexts = new String[]{"test1.txt", "test2.txt", "test3.txt"};
-  private String[] texts = new String[sampleTexts.length];
 
   public TestSWFParser(String name) { 
     super(name);
@@ -94,5 +92,4 @@ public class TestSWFParser extends TestC
       assertTrue(sampleTexts[i].equals(text));
     }
   }
-
 }

Modified: nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java (original)
+++ nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/DOMBuilder.java Tue May 21 01:19:26 2013
@@ -58,7 +58,7 @@ class DOMBuilder
   public DocumentFragment m_docFrag = null;
 
   /** Vector of element nodes          */
-  protected Stack m_elemStack = new Stack();
+  protected Stack<Element> m_elemStack = new Stack<Element>();
 
   /**
    * DOMBuilder instance constructor... it will add the DOM nodes

Modified: nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java (original)
+++ nutch/trunk/src/plugin/parse-tika/src/java/org/apache/nutch/parse/tika/TikaParser.java Tue May 21 01:19:26 2013
@@ -59,7 +59,8 @@ public class TikaParser implements org.a
 	private HtmlParseFilters htmlParseFilters;
 	private String cachingPolicy;
 
-	public ParseResult getParse(Content content) {
+	@SuppressWarnings("deprecation")
+  public ParseResult getParse(Content content) {
 		String mimeType = content.getContentType();
 
 		URL base;

Modified: nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java (original)
+++ nutch/trunk/src/plugin/parse-zip/src/java/org/apache/nutch/parse/zip/ZipParser.java Tue May 21 01:19:26 2013
@@ -18,15 +18,12 @@
 package org.apache.nutch.parse.zip;
 
 import java.io.ByteArrayInputStream;
-import java.io.InputStream;
-import java.util.Properties;
 import java.util.ArrayList;
 import java.util.List;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import org.apache.nutch.metadata.Metadata;
 import org.apache.nutch.net.protocols.Response;
 import org.apache.nutch.parse.Outlink;
 import org.apache.nutch.parse.ParseData;
@@ -40,8 +37,6 @@ import org.apache.hadoop.conf.Configurat
 /**
  * ZipParser class based on MSPowerPointParser class by Stephan Strittmatter.
  * Nutch parse plugin for zip files - Content Type : application/zip
- * 
- * @author Rohit Kulkarni & Ashish Vaidya
  */
 public class ZipParser implements Parser {
 
@@ -57,17 +52,13 @@ public class ZipParser implements Parser
     String resultText = null;
     String resultTitle = null;
     Outlink[] outlinks = null;
-    List outLinksList = new ArrayList();
-    Properties properties = null;
+    List<Outlink> outLinksList = new ArrayList<Outlink>();
 
     try {
       final String contentLen = content.getMetadata().get(Response.CONTENT_LENGTH);
       final int len = Integer.parseInt(contentLen);
       if (LOG.isDebugEnabled()) { LOG.debug("ziplen: " + len); }
       final byte[] contentInBytes = content.getContent();
-      final ByteArrayInputStream bainput = new ByteArrayInputStream(
-          contentInBytes);
-      final InputStream input = bainput;
 
       if (contentLen != null && contentInBytes.length != len) {
         return new ParseStatus(ParseStatus.FAILED,

Modified: nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java (original)
+++ nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Client.java Tue May 21 01:19:26 2013
@@ -77,9 +77,9 @@ public class Client extends FTP
     private int __dataTimeout;
     private int __passivePort;
     private String __passiveHost;
-    private int __fileType, __fileFormat;
+//    private int __fileType, __fileFormat;
     private boolean __remoteVerificationEnabled;
-    private FTPFileEntryParser __entryParser;
+//    private FTPFileEntryParser __entryParser;
     private String __systemName;
 
     // constructor
@@ -95,10 +95,10 @@ public class Client extends FTP
     {
         __passiveHost        = null;
         __passivePort        = -1;
-        __fileType           = FTP.ASCII_FILE_TYPE;
-        __fileFormat         = FTP.NON_PRINT_TEXT_FORMAT;
         __systemName         = null;
-        __entryParser        = null;
+//        __fileType           = FTP.ASCII_FILE_TYPE;
+//        __fileFormat         = FTP.NON_PRINT_TEXT_FORMAT;
+//        __entryParser        = null;
     }
 
     // parse reply for pass()
@@ -315,7 +315,7 @@ public class Client extends FTP
     }
 
     // retrieve list reply for path
-    public void retrieveList(String path, List entries, int limit,
+    public void retrieveList(String path, List<FTPFile> entries, int limit,
       FTPFileEntryParser parser)
       throws IOException,
         FtpExceptionCanNotHaveDataConnection,
@@ -331,7 +331,7 @@ public class Client extends FTP
           new BufferedReader(new InputStreamReader(socket.getInputStream()));
 
       // force-close data channel socket, when download limit is reached
-      boolean mandatory_close = false;
+//      boolean mandatory_close = false;
 
       //List entries = new LinkedList();
       int count = 0;
@@ -348,7 +348,7 @@ public class Client extends FTP
         // impose download limit if limit >= 0, otherwise no limit
         // here, cut off is up to the line when total bytes is just over limit
         if (limit >= 0 && count > limit) {
-          mandatory_close = true;
+//          mandatory_close = true;
           break;
         }
         line = parser.readNextEntry(reader);
@@ -403,7 +403,7 @@ public class Client extends FTP
       // fixme, should we instruct server here for binary file type?
 
       // force-close data channel socket
-      boolean mandatory_close = false;
+      // boolean mandatory_close = false;
 
       int len; int count = 0;
       byte[] buf =
@@ -414,7 +414,7 @@ public class Client extends FTP
         // here, cut off is exactly of limit bytes
         if (limit >= 0 && count > limit) {
           os.write(buf,0,len-(count-limit));
-          mandatory_close = true;
+       //   mandatory_close = true;
           break;
         }
         os.write(buf,0,len);
@@ -502,8 +502,8 @@ public class Client extends FTP
     {
         if (FTPReply.isPositiveCompletion(type(fileType)))
         {
-            __fileType = fileType;
-            __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;
+/*            __fileType = fileType;
+            __fileFormat = FTP.NON_PRINT_TEXT_FORMAT;*/
             return true;
         }
         return false;

Modified: nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java (original)
+++ nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/Ftp.java Tue May 21 01:19:26 2013
@@ -55,7 +55,7 @@ public class Ftp implements Protocol {
 
   public static final Logger LOG = LoggerFactory.getLogger(Ftp.class);
 
-  static final int BUFFER_SIZE = 16384; // 16*1024 = 16384
+  private static final int BUFFER_SIZE = 16384; // 16*1024 = 16384
 
   static final int MAX_REDIRECTS = 5;
 
@@ -257,5 +257,9 @@ public class Ftp implements Protocol {
   public BaseRobotRules getRobotRules(Text url, CrawlDatum datum) {
     return RobotRulesParser.EMPTY_RULES;
   }
+
+  public int getBufferSize() {
+    return BUFFER_SIZE;
+  }
 }
 

Modified: nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java (original)
+++ nutch/trunk/src/plugin/protocol-ftp/src/java/org/apache/nutch/protocol/ftp/FtpResponse.java Tue May 21 01:19:26 2013
@@ -17,11 +17,9 @@
 
 package org.apache.nutch.protocol.ftp;
 
-
 import org.apache.commons.net.ftp.FTP;
 import org.apache.commons.net.ftp.FTPFile;
 import org.apache.commons.net.ftp.FTPReply;
-
 import org.apache.commons.net.ftp.parser.DefaultFTPFileEntryParserFactory;
 import org.apache.commons.net.ftp.parser.ParserInitializationException;
 
@@ -42,8 +40,7 @@ import java.util.LinkedList;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 
-
-/************************************
+/**
  * FtpResponse.java mimics ftp replies as http response.
  * It tries its best to follow http's way for headers, response codes
  * as well as exceptions.
@@ -53,9 +50,7 @@ import java.io.IOException;
  * and some important commons-net exceptions passed by Client.java
  * must have been properly dealt with. They'd better not be leaked
  * to the caller of this class.
- *
- * @author John Xing
- ***********************************/
+ */
 public class FtpResponse {
 
   private String orig;
@@ -146,7 +141,7 @@ public class FtpResponse {
         // follow ftp talk?
         if (ftp.followTalk)
           ftp.client.addProtocolCommandListener(
-            new PrintCommandListener(ftp.LOG));
+            new PrintCommandListener(Ftp.LOG));
       }
 
       // quit from previous site if at a different site now
@@ -284,8 +279,8 @@ public class FtpResponse {
       }
       
     } catch (Exception e) {
-      if (ftp.LOG.isWarnEnabled()) {
-        ftp.LOG.warn("Error: ", e);
+      if (Ftp.LOG.isWarnEnabled()) {
+        Ftp.LOG.warn("Error: ", e);
       }
       // for any un-foreseen exception (run time exception or not),
       // do ultimate clean and leave ftp.client for garbage collection
@@ -312,11 +307,11 @@ public class FtpResponse {
     throws IOException {
 
     ByteArrayOutputStream os = null;
-    List list = null;
+    List<FTPFile> list = null;
 
     try {
       // first get its possible attributes
-      list = new LinkedList();
+      list = new LinkedList<FTPFile>();
       ftp.client.retrieveList(path, list, ftp.maxContentLength, ftp.parser);
 
       FTPFile ftpFile = (FTPFile) list.get(0);
@@ -329,7 +324,7 @@ public class FtpResponse {
         code = 304;
         return;
       }
-      os = new ByteArrayOutputStream(ftp.BUFFER_SIZE);
+      os = new ByteArrayOutputStream(ftp.getBufferSize());
       ftp.client.retrieveFile(path, os, ftp.maxContentLength);
 
       this.content = os.toByteArray();
@@ -414,7 +409,7 @@ public class FtpResponse {
   // get ftp dir list as http response
   private void getDirAsHttpResponse(String path, long lastModified)
     throws IOException {
-    List list = new LinkedList();
+    List<FTPFile> list = new LinkedList<FTPFile>();
 
     try {
 
@@ -482,7 +477,7 @@ public class FtpResponse {
   }
 
   // generate html page from ftp dir list
-  private byte[] list2html(List list, String path, boolean includeDotDot) {
+  private byte[] list2html(List<FTPFile> list, String path, boolean includeDotDot) {
 
     //StringBuffer x = new StringBuffer("<!doctype html public \"-//ietf//dtd html//en\"><html><head>");
     StringBuffer x = new StringBuffer("<html><head>");

Modified: nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java (original)
+++ nutch/trunk/src/plugin/protocol-httpclient/src/java/org/apache/nutch/protocol/httpclient/Http.java Tue May 21 01:19:26 2013
@@ -42,6 +42,8 @@ import org.apache.commons.httpclient.NTC
 import org.apache.commons.httpclient.auth.AuthScope;
 import org.apache.commons.httpclient.params.HttpConnectionManagerParams;
 import org.apache.commons.httpclient.protocol.Protocol;
+import org.apache.commons.httpclient.protocol.ProtocolSocketFactory;
+import org.apache.commons.httpclient.protocol.SSLProtocolSocketFactory;
 
 // Nutch imports
 import org.apache.nutch.crawl.CrawlDatum;
@@ -158,8 +160,8 @@ public class Http extends HttpBase {
   private void configureClient() {
 
     // Set up an HTTPS socket factory that accepts self-signed certs.
-    Protocol https = new Protocol("https",
-        new DummySSLProtocolSocketFactory(), 443);
+    ProtocolSocketFactory factory = new SSLProtocolSocketFactory();
+    Protocol https = new Protocol("https", factory, 443);
     Protocol.registerProtocol("https", https);
 
     HttpConnectionManagerParams params = connectionManager.getParams();
@@ -174,7 +176,7 @@ public class Http extends HttpBase {
     client.getParams().setConnectionManagerTimeout(timeout);
 
     HostConfiguration hostConf = client.getHostConfiguration();
-    ArrayList headers = new ArrayList();
+    ArrayList<Header> headers = new ArrayList<Header>();
     // Set the User Agent in the header
     headers.add(new Header("User-Agent", userAgent));
     // prefer English
@@ -199,7 +201,7 @@ public class Http extends HttpBase {
 
         NTCredentials proxyCredentials = new NTCredentials(
             this.proxyUsername, this.proxyPassword,
-            this.agentHost, this.proxyRealm);
+            Http.agentHost, this.proxyRealm);
 
         client.getState().setProxyCredentials(
             proxyAuthScope, proxyCredentials);

Modified: nutch/trunk/src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java (original)
+++ nutch/trunk/src/plugin/protocol-httpclient/src/test/org/apache/nutch/protocol/httpclient/TestProtocolHttpClient.java Tue May 21 01:19:26 2013
@@ -23,7 +23,6 @@ import junit.framework.TestCase;
 import org.mortbay.jetty.Server;
 import org.mortbay.jetty.bio.SocketConnector;
 import org.mortbay.jetty.handler.ContextHandler;
-import org.mortbay.jetty.handler.ResourceHandler;
 import org.mortbay.jetty.servlet.ServletHandler;
 import org.mortbay.jetty.servlet.SessionHandler;
 import org.apache.hadoop.conf.Configuration;
@@ -32,8 +31,6 @@ import org.apache.nutch.net.protocols.Re
 
 /**
  * Test cases for protocol-httpclient.
- *
- * @author Susam Pal
  */
 public class TestProtocolHttpClient extends TestCase {
 

Modified: nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java (original)
+++ nutch/trunk/src/plugin/urlfilter-prefix/src/java/org/apache/nutch/urlfilter/prefix/PrefixURLFilter.java Tue May 21 01:19:26 2013
@@ -14,9 +14,6 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-
-// $Id$
-
 package org.apache.nutch.urlfilter.prefix;
 
 import org.slf4j.Logger;
@@ -79,7 +76,7 @@ public class PrefixURLFilter implements 
     throws IOException {
     
     BufferedReader in=new BufferedReader(reader);
-    List urlprefixes = new ArrayList();
+    List<String> urlprefixes = new ArrayList<String>();
     String line;
 
     while((line=in.readLine())!=null) {

Modified: nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java (original)
+++ nutch/trunk/src/plugin/urlfilter-suffix/src/java/org/apache/nutch/urlfilter/suffix/SuffixURLFilter.java Tue May 21 01:19:26 2013
@@ -181,7 +181,7 @@ public class SuffixURLFilter implements 
       return;
     }
     BufferedReader in = new BufferedReader(reader);
-    List aSuffixes = new ArrayList();
+    List<String> aSuffixes = new ArrayList<String>();
     boolean allow = false;
     boolean ignore = false;
     String line;

Modified: nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java (original)
+++ nutch/trunk/src/test/org/apache/nutch/crawl/TestCrawlDbMerger.java Tue May 21 01:19:26 2013
@@ -44,9 +44,9 @@ public class TestCrawlDbMerger extends T
           url21
   };
   
-  TreeSet init1 = new TreeSet();
-  TreeSet init2 = new TreeSet();
-  HashMap expected = new HashMap();
+  TreeSet<String> init1 = new TreeSet<String>();
+  TreeSet<String> init2 = new TreeSet<String>();
+  HashMap<String, CrawlDatum> expected = new HashMap<String, CrawlDatum>();
   CrawlDatum cd1, cd2, cd3;
   Configuration conf;
   FileSystem fs;
@@ -83,6 +83,7 @@ public class TestCrawlDbMerger extends T
     fs.mkdirs(testDir);
   }
   
+  @SuppressWarnings("deprecation")
   public void tearDown() {
     try {
       if (fs.exists(testDir))
@@ -93,6 +94,7 @@ public class TestCrawlDbMerger extends T
     } catch (Exception e) { }
   }
 
+  @SuppressWarnings("deprecation")
   public void testMerge() throws Exception {
     Path crawldb1 = new Path(testDir, "crawldb1");
     Path crawldb2 = new Path(testDir, "crawldb2");
@@ -105,11 +107,11 @@ public class TestCrawlDbMerger extends T
     LOG.fine("* reading crawldb: " + output);
     reader = new CrawlDbReader();
     String crawlDb = output.toString();
-    Iterator it = expected.keySet().iterator();
+    Iterator<String> it = expected.keySet().iterator();
     while (it.hasNext()) {
-      String url = (String)it.next();
+      String url = it.next();
       LOG.fine("url=" + url);
-      CrawlDatum cd = (CrawlDatum)expected.get(url);
+      CrawlDatum cd = expected.get(url);
       CrawlDatum res = reader.get(crawlDb, url, conf);
       LOG.fine(" -> " + res);
       System.out.println("url=" + url);
@@ -123,13 +125,13 @@ public class TestCrawlDbMerger extends T
     fs.delete(testDir);
   }
   
-  private void createCrawlDb(Configuration config, FileSystem fs, Path crawldb, TreeSet init, CrawlDatum cd) throws Exception {
+  private void createCrawlDb(Configuration config, FileSystem fs, Path crawldb, TreeSet<String> init, CrawlDatum cd) throws Exception {
     LOG.fine("* creating crawldb: " + crawldb);
     Path dir = new Path(crawldb, CrawlDb.CURRENT_NAME);
     MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir, "part-00000").toString(), Text.class, CrawlDatum.class);
-    Iterator it = init.iterator();
+    Iterator<String> it = init.iterator();
     while (it.hasNext()) {
-      String key = (String)it.next();
+      String key = it.next();
       writer.append(new Text(key), cd);
     }
     writer.close();

Modified: nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java (original)
+++ nutch/trunk/src/test/org/apache/nutch/crawl/TestLinkDbMerger.java Tue May 21 01:19:26 2013
@@ -68,9 +68,9 @@ public class TestLinkDbMerger extends Te
   String[] urls20_expected = urls11_expected;
   String[] urls21_expected = urls21;
   
-  TreeMap init1 = new TreeMap();
-  TreeMap init2 = new TreeMap();
-  HashMap expected = new HashMap();
+  TreeMap<String, String[]> init1 = new TreeMap<String, String[]>();
+  TreeMap<String, String[]> init2 = new TreeMap<String, String[]>();
+  HashMap<String, String[]> expected = new HashMap<String, String[]>();
   Configuration conf;
   Path testDir;
   FileSystem fs;
@@ -116,16 +116,16 @@ public class TestLinkDbMerger extends Te
     merger.merge(output, new Path[]{linkdb1, linkdb2}, false, false);
     LOG.fine("* reading linkdb: " + output);
     reader = new LinkDbReader(conf, output);
-    Iterator it = expected.keySet().iterator();
+    Iterator<String> it = expected.keySet().iterator();
     while (it.hasNext()) {
-      String url = (String)it.next();
+      String url = it.next();
       LOG.fine("url=" + url);
-      String[] vals = (String[])expected.get(url);
+      String[] vals = expected.get(url);
       Inlinks inlinks = reader.getInlinks(new Text(url));
       // may not be null
       assertNotNull(inlinks);
-      ArrayList links = new ArrayList();
-      Iterator it2 = inlinks.iterator();
+      ArrayList<String> links = new ArrayList<String>();
+      Iterator<?> it2 = inlinks.iterator();
       while (it2.hasNext()) {
         Inlink in = (Inlink)it2.next();
         links.add(in.getFromUrl());
@@ -139,15 +139,15 @@ public class TestLinkDbMerger extends Te
     fs.delete(testDir, true);
   }
   
-  private void createLinkDb(Configuration config, FileSystem fs, Path linkdb, TreeMap init) throws Exception {
+  private void createLinkDb(Configuration config, FileSystem fs, Path linkdb, TreeMap<String, String[]> init) throws Exception {
     LOG.fine("* creating linkdb: " + linkdb);
     Path dir = new Path(linkdb, LinkDb.CURRENT_NAME);
     MapFile.Writer writer = new MapFile.Writer(config, fs, new Path(dir, "part-00000").toString(), Text.class, Inlinks.class);
-    Iterator it = init.keySet().iterator();
+    Iterator<String> it = init.keySet().iterator();
     while (it.hasNext()) {
-      String key = (String)it.next();
+      String key = it.next();
       Inlinks inlinks = new Inlinks();
-      String[] vals = (String[])init.get(key);
+      String[] vals = init.get(key);
       for (int i = 0; i < vals.length; i++) {
         Inlink in = new Inlink(vals[i], vals[i]);
         inlinks.add(in);

Modified: nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java (original)
+++ nutch/trunk/src/test/org/apache/nutch/plugin/TestPluginSystem.java Tue May 21 01:19:26 2013
@@ -35,13 +35,11 @@ import org.apache.nutch.util.NutchJob;
 
 /**
  * Unit tests for the plugin system
- * 
- * @author joa23
  */
 public class TestPluginSystem extends TestCase {
     private int fPluginCount;
 
-    private LinkedList fFolders = new LinkedList();
+    private LinkedList<File> fFolders = new LinkedList<File>();
     private Configuration conf ;
     private PluginRepository repository;
 
@@ -62,11 +60,10 @@ public class TestPluginSystem extends Te
      */
     protected void tearDown() throws Exception {
         for (int i = 0; i < fFolders.size(); i++) {
-            File folder = (File) fFolders.get(i);
+            File folder = fFolders.get(i);
             delete(folder);
             folder.delete();
         }
-
     }
 
     /**

Modified: nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java
URL: http://svn.apache.org/viewvc/nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java?rev=1484634&r1=1484633&r2=1484634&view=diff
==============================================================================
--- nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java (original)
+++ nutch/trunk/src/test/org/apache/nutch/segment/TestSegmentMerger.java Tue May 21 01:19:26 2013
@@ -42,11 +42,11 @@ public class TestSegmentMerger extends T
   public void setUp() throws Exception {
     conf = NutchConfiguration.create();
     fs = FileSystem.get(conf);
-    long blkSize = fs.getDefaultBlockSize();
     testDir = new Path(conf.get("hadoop.tmp.dir"), "merge-" + System.currentTimeMillis());
     seg1 = new Path(testDir, "seg1");
     seg2 = new Path(testDir, "seg2");
     out = new Path(testDir, "out");
+
     // create large parse-text segments
     System.err.println("Creating large segment 1...");
     DecimalFormat df = new DecimalFormat("0000000");
@@ -55,6 +55,9 @@ public class TestSegmentMerger extends T
     MapFile.Writer w = new MapFile.Writer(conf, fs, ptPath.toString(), Text.class, ParseText.class);
     long curSize = 0;
     countSeg1 = 0;
+    FileStatus fileStatus = fs.getFileStatus(ptPath);
+    long blkSize = fileStatus.getBlockSize();
+    
     while (curSize < blkSize * 2) {
       k.set("seg1-" + df.format(countSeg1));
       w.append(k, new ParseText("seg1 text " + countSeg1));