You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lenya.apache.org by mi...@apache.org on 2004/03/01 06:32:11 UTC
cvs commit: cocoon-lenya/src/java/org/apache/lenya/search/crawler IterativeHTMLCrawler.java
michi 2004/02/29 21:32:11
Modified: src/java/org/apache/lenya/search/crawler
IterativeHTMLCrawler.java
Log:
logging and output modified
Revision Changes Path
1.22 +21 -10 cocoon-lenya/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java
Index: IterativeHTMLCrawler.java
===================================================================
RCS file: /home/cvs/cocoon-lenya/src/java/org/apache/lenya/search/crawler/IterativeHTMLCrawler.java,v
retrieving revision 1.21
retrieving revision 1.22
diff -u -r1.21 -r1.22
--- IterativeHTMLCrawler.java 26 Feb 2004 00:39:12 -0000 1.21
+++ IterativeHTMLCrawler.java 1 Mar 2004 05:32:11 -0000 1.22
@@ -143,7 +143,7 @@
String currentURLPath = start.toString().substring(0, start.toString().lastIndexOf("/"));
try {
- log.info(".crawl(): Start crawling at: " + start);
+ log.info("Start crawling at: " + start);
if (addURL(start.getFile(), currentURLPath) != null) {
dumpHTDoc(start);
@@ -194,8 +194,7 @@
parent.mkdirs();
log.warn("Directory has been created: " + parent);
}
- java.io.PrintWriter out = new java.io.PrintWriter(new java.io.FileOutputStream(
- url_list_file));
+ java.io.PrintWriter out = new java.io.PrintWriter(new FileOutputStream(url_list_file));
for (int i = 0; i < urlsToCrawl.size(); i++) {
out.println("" + urlsToCrawl.elementAt(i));
@@ -471,6 +470,16 @@
HttpURLConnection httpConnection = (HttpURLConnection) url.openConnection();
java.io.InputStream in = httpConnection.getInputStream();
+
+ FileOutputStream out = new FileOutputStream(file);
+ byte[] buffer = new byte[1024];
+ int bytesRead = -1;
+ while ((bytesRead = in.read(buffer)) >= 0) {
+ out.write(buffer, 0, bytesRead);
+ }
+ out.close();
+
+/*
BufferedInputStream bin = new BufferedInputStream(in);
BufferedReader reader = new BufferedReader(new InputStreamReader(bin));
@@ -484,22 +493,24 @@
fw.close();
bin.close();
+*/
in.close();
httpConnection.disconnect();
- System.out.println(".dumpHTDoc(): INFO: URL dumped: " + url);
+ log.info("URL dumped: " + url + " (" + file + ")");
} catch (Exception e) {
- System.err.println(".dumpHTDoc(): ERROR: " + e);
- System.out.println(".dumpHTDoc(): ERROR: URL not dumped: " + url);
+ log.error("" + e);
+ log.error("URL not dumped: " + url);
}
} else {
- System.out.println(".dumpHTDoc(): INFO: URL not dumped: " + url);
+ log.info("URL not dumped: " + url);
}
}
/**
*
*/
+/*
public void saveToFile(String filename, byte[] bytes)
throws FileNotFoundException, IOException {
File file = new File(filename);
@@ -511,8 +522,7 @@
File parent = new File(file.getParent());
if (!parent.exists()) {
- System.out.println(".saveToFile(): Directory will be created: " +
- parent.getAbsolutePath());
+ log.warn("Directory will be created: " + parent.getAbsolutePath());
parent.mkdirs();
}
@@ -520,6 +530,7 @@
out.write(bytes);
out.close();
}
+*/
/**
* DOCUMENT ME!
---------------------------------------------------------------------
To unsubscribe, e-mail: lenya-cvs-unsubscribe@cocoon.apache.org
For additional commands, e-mail: lenya-cvs-help@cocoon.apache.org