You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lenya.apache.org by mi...@apache.org on 2004/03/01 06:32:11 UTC

cvs commit: cocoon-lenya/src/java/org/apache/lenya/search/crawler IterativeHTMLCrawler.java

michi       2004/02/29 21:32:11

  Modified:    src/java/org/apache/lenya/search/crawler
                        IterativeHTMLCrawler.java
  Log:
  logging and output modified
  
  Revision  Changes    Path
  1.22      +21 -10    cocoon-lenya/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java
  
  Index: IterativeHTMLCrawler.java
  ===================================================================
  RCS file: /home/cvs/cocoon-lenya/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java,v
  retrieving revision 1.21
  retrieving revision 1.22
  diff -u -r1.21 -r1.22
  --- IterativeHTMLCrawler.java	26 Feb 2004 00:39:12 -0000	1.21
  +++ IterativeHTMLCrawler.java	1 Mar 2004 05:32:11 -0000	1.22
  @@ -143,7 +143,7 @@
           String currentURLPath = start.toString().substring(0, start.toString().lastIndexOf("/"));
   
           try {
  -            log.info(".crawl(): Start crawling at: " + start);
  +            log.info("Start crawling at: " + start);
   
               if (addURL(start.getFile(), currentURLPath) != null) {
                   dumpHTDoc(start);
  @@ -194,8 +194,7 @@
                   parent.mkdirs();
                   log.warn("Directory has been created: " + parent);
               }
  -            java.io.PrintWriter out = new java.io.PrintWriter(new java.io.FileOutputStream(
  -                        url_list_file));
  +            java.io.PrintWriter out = new java.io.PrintWriter(new FileOutputStream(url_list_file));
   
               for (int i = 0; i < urlsToCrawl.size(); i++) {
                   out.println("" + urlsToCrawl.elementAt(i));
  @@ -471,6 +470,16 @@
   
                   HttpURLConnection httpConnection = (HttpURLConnection) url.openConnection();
                   java.io.InputStream in = httpConnection.getInputStream();
  +
  +		FileOutputStream out = new FileOutputStream(file);
  +                byte[] buffer = new byte[1024];
  +                int bytesRead = -1;
  +                while ((bytesRead = in.read(buffer)) >= 0) {
  +                    out.write(buffer, 0, bytesRead);
  +                }
  +                out.close();
  +
  +/*
                   BufferedInputStream bin = new BufferedInputStream(in);
                   BufferedReader reader = new BufferedReader(new InputStreamReader(bin));
   
  @@ -484,22 +493,24 @@
                   fw.close();
   
                   bin.close();
  +*/
                   in.close();
                   httpConnection.disconnect();
   
  -                System.out.println(".dumpHTDoc(): INFO: URL dumped: " + url);
  +                log.info("URL dumped: " + url + " (" + file + ")");
               } catch (Exception e) {
  -                System.err.println(".dumpHTDoc(): ERROR: " + e);
  -                System.out.println(".dumpHTDoc(): ERROR: URL not dumped: " + url);
  +                log.error("" + e);
  +                log.error("URL not dumped: " + url);
               }
           } else {
  -            System.out.println(".dumpHTDoc(): INFO: URL not dumped: " + url);
  +            log.info("URL not dumped: " + url);
           }
       }
   
       /**
        *
        */
  +/*
       public void saveToFile(String filename, byte[] bytes)
           throws FileNotFoundException, IOException {
           File file = new File(filename);
  @@ -511,8 +522,7 @@
           File parent = new File(file.getParent());
   
           if (!parent.exists()) {
  -            System.out.println(".saveToFile(): Directory will be created: " +
  -                parent.getAbsolutePath());
  +            log.warn("Directory will be created: " + parent.getAbsolutePath());
               parent.mkdirs();
           }
   
  @@ -520,6 +530,7 @@
           out.write(bytes);
           out.close();
       }
  +*/
   
       /**
        * DOCUMENT ME!
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lenya-cvs-unsubscribe@cocoon.apache.org
For additional commands, e-mail: lenya-cvs-help@cocoon.apache.org