You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by hu...@apache.org on 2002/06/30 18:36:11 UTC
cvs commit: xml-cocoon2/src/java/org/apache/cocoon/generation LinkStatusGenerator.java
huber 2002/06/30 09:36:11
Modified: src/java/org/apache/cocoon/generation
LinkStatusGenerator.java
Log:
Added more javadoc comments, fixed generating attribute values of url, not of
url built for requesting its links
Revision Changes Path
1.2 +68 -78 xml-cocoon2/src/java/org/apache/cocoon/generation/LinkStatusGenerator.java
Index: LinkStatusGenerator.java
===================================================================
RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/generation/LinkStatusGenerator.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- LinkStatusGenerator.java 14 Jun 2002 16:19:14 -0000 1.1
+++ LinkStatusGenerator.java 30 Jun 2002 16:36:11 -0000 1.2
@@ -35,7 +35,8 @@
*
* @author Michael Homeijer
* @author Nicola Ken Barozzi (nicolaken@apache.org)
-*/
+ * @author Bernhard Huber (huber@apache.org)
+ */
public class LinkStatusGenerator extends ComposerGenerator implements Recyclable, Configurable {
/** The URI of the namespace of this generator. */
@@ -128,10 +129,8 @@
public final static String USER_AGENT_CONFIG = "user-agent";
/**
* Default value of <code>user-agent</code> configuration value.
- * <p>
- * Its value is @see org.apache.cocoon.Constants#COMPLETE_NAME.
- * </p>
*
+ * @see org.apache.cocoon.Constants#COMPLETE_NAME
* @since
*/
public final static String USER_AGENT_DEFAULT = Constants.COMPLETE_NAME;
@@ -203,10 +202,12 @@
* query-string appended to each crawling request.
* </p>
* <pre><tt>
- * <include>.*\.html?</exclude> or <exclude>.*\.html?, .*\.xsp</exclude>
+ * <include>.*\.html?</include> or <include>.*\.html?, .*\.xsp</include>
* <exclude>.*\.gif</exclude> or <exclude>.*\.gif, .*\.jpe?g</exclude>
* <link-content-type> application/x-cocoon-links </link-content-type>
* <link-view-query> ?cocoon-view=links </link-view-query>
+ * <user-agent> Cocoon </user-agent>
+ * <accept> text/xml </accept>
* </tt></pre>
*
* @param configuration XML configuration of this avalon component.
@@ -298,8 +299,9 @@
/* Create a reusable attributes for creating nodes */
this.attributes = new AttributesImpl();
- excludeCrawlingURL = new HashSet();
- this.setDefaultExcludeFromCrawling();
+ // already done in configure...
+ //excludeCrawlingURL = new HashSet();
+ //this.setDefaultExcludeFromCrawling();
}
/**
@@ -342,12 +344,12 @@
// remove it from the to-do list
linksToProcess.remove(link);
- URLConnection conn = processURL(url, link.getReferrer());
+ String new_url_link = processURL(url, link.getReferrer());
// calc all links from this url
- if (conn != null) {
+ if (new_url_link != null) {
- List url_links = getLinksFromConnection(conn, url);
+ List url_links = getLinksFromConnection(new_url_link, url);
if (url_links != null) {
// add links of this url to the to-do list
linksToProcess.addAll(url_links);
@@ -401,9 +403,22 @@
}
- protected List getLinksFromConnection(URLConnection conn, URL url) {
+ /**
+ * Retrieve a list of links of a url
+ *
+ * @param url_link_string url for requesting links, it is assumed that
+ * url_link_string queries the cocoon view links, ie of the form
+ * <code>http://host/foo/bar?cocoon-view=links</code>
+ * @param url_of_referrer base url of which links are requested, ie of the form
+ * <code>http://host/foo/bar</code>
+ * @return List of links from url_of_referrer, as result of requesting url
+ * url_link_string
+ */
+ protected List getLinksFromConnection(String url_link_string, URL url_of_referrer) {
List url_links = null;
try {
+ URL url_link = new URL( url_link_string );
+ URLConnection conn = url_link.openConnection();
String content_type = conn.getContentType();
if (getLogger().isDebugEnabled()) {
@@ -419,7 +434,7 @@
// content is supposed to be a list of links,
// relative to current URL
String line;
- String referrer = url.toString();
+ String referrer = url_of_referrer.toString();
while ((line = br.readLine()) != null) {
URL new_url = new URL(url, line);
@@ -459,15 +474,23 @@
return url_links;
}
- protected URLConnection processURL(URL url, String referrer) throws SAXException {
+ /**
+ * Generate xml attributes of a url, calculate url for retrieving links
+ *
+ * @param url to process
+ * @param referrer of the url
+ * @return String url for retrieving links, or null if url is an excluded-url,
+ * and not an included-url.
+ */
+ protected String processURL(URL url, String referrer) throws SAXException {
if (getLogger().isDebugEnabled()) {
getLogger().debug("getLinks URL " + url);
}
- URLConnection result = null;
+ String result = null;
- // don't try to investigate url which has been crawled already
+ // don't try to investigate a url which has been crawled already
if (crawled.contains(url.toString())) {
return null;
}
@@ -481,71 +504,38 @@
attributes.addAttribute("", REFERRER_ATTR_NAME,
REFERRER_ATTR_NAME, "CDATA", referrer);
- // don't try to get links for url which is excluded from crawling
- if (isExcludedURL(url.toString())) {
- // Check for status and output it.
-
- try {
- URLConnection links_url_connection = url.openConnection();
- HttpURLConnection h = (HttpURLConnection)links_url_connection;
- String content_type = links_url_connection.getContentType();
-
- attributes.addAttribute("", CONTENT_ATTR_NAME,
- CONTENT_ATTR_NAME, "CDATA",
- content_type);
-
- attributes.addAttribute("", MESSAGE_ATTR_NAME,
- MESSAGE_ATTR_NAME, "CDATA",
- h.getResponseMessage());
-
- attributes.addAttribute("", STATUS_ATTR_NAME,
- STATUS_ATTR_NAME, "CDATA",
- String.valueOf(h.getResponseCode()));
-
-
-
- }
- catch (IOException ioe)
- {
- attributes.addAttribute("", MESSAGE_ATTR_NAME,
- MESSAGE_ATTR_NAME, "CDATA",
- ioe.getMessage());
- }
-
- } else {
-
- // Output url, referrer, content-type, status, message for traversable url's
- // add prefix and query to get data from the linkserializer.
- try {
- URL links_url = new URL(url.toExternalForm()
- + ((url.toExternalForm().indexOf("?") == -1) ? "?" : "&")
- + linkViewQuery);
- URLConnection links_url_connection = links_url.openConnection();
- HttpURLConnection h = (HttpURLConnection)links_url_connection;
-
- result = links_url_connection;
-
- attributes.addAttribute("", CONTENT_ATTR_NAME,
- CONTENT_ATTR_NAME, "CDATA",
- links_url_connection.getContentType());
-
- attributes.addAttribute("", MESSAGE_ATTR_NAME,
- MESSAGE_ATTR_NAME, "CDATA",
- h.getResponseMessage());
-
- attributes.addAttribute("", STATUS_ATTR_NAME,
- STATUS_ATTR_NAME, "CDATA",
- String.valueOf(h.getResponseCode()));
- }
- catch(IOException ioe ) {
- // Output url referrer status message
- attributes.addAttribute("", MESSAGE_ATTR_NAME,
+ // Output url, referrer, content-type, status, message for traversable url's
+ try {
+ URLConnection links_url_connection = url.openConnection();
+ HttpURLConnection h = (HttpURLConnection)links_url_connection;
+ String content_type = links_url_connection.getContentType();
+
+ attributes.addAttribute("", CONTENT_ATTR_NAME,
+ CONTENT_ATTR_NAME, "CDATA",
+ content_type);
+
+ attributes.addAttribute("", MESSAGE_ATTR_NAME,
+ MESSAGE_ATTR_NAME, "CDATA",
+ h.getResponseMessage());
+
+ attributes.addAttribute("", STATUS_ATTR_NAME,
+ STATUS_ATTR_NAME, "CDATA",
+ String.valueOf(h.getResponseCode()));
+ } catch (IOException ioe) {
+ attributes.addAttribute("", MESSAGE_ATTR_NAME,
MESSAGE_ATTR_NAME, "CDATA",
ioe.getMessage());
-
-
- }
}
+
+ // don't try to get links of a url which is excluded from crawling
+ // try to get links of a url which is included for crawling
+ if (!isExcludedURL(url.toString()) && isIncludedURL( url.toString() )) {
+ // add prefix and query to get data from the linkserializer.
+ result = url.toExternalForm()
+ + ((url.toExternalForm().indexOf("?") == -1) ? "?" : "&")
+ + linkViewQuery;
+ }
+
super.contentHandler.startElement(URI, LINK_NODE_NAME, URI+':'+LINK_NODE_NAME, attributes);
super.contentHandler.endElement(URI, LINK_NODE_NAME, URI+':'+LINK_NODE_NAME);
@@ -623,7 +613,7 @@
super.recycle();
this.attributes = null;
- this.excludeCrawlingURL = null;
+ //this.excludeCrawlingURL = null;
}
}
----------------------------------------------------------------------
In case of troubles, e-mail: webmaster@xml.apache.org
To unsubscribe, e-mail: cocoon-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: cocoon-cvs-help@xml.apache.org