You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by st...@apache.org on 2001/12/12 00:00:25 UTC
cvs commit: xml-cocoon2/src/org/apache/cocoon/components/lucene CocoonCrawler.java CocoonErrorHandler.java IndexHelperField.java LuceneCocoonHelper.java LuceneCocoonIndexer.java LuceneCocoonPager.java LuceneCocoonSearcher.java LuceneIndexContentHandler.java LuceneXMLIndexer.java SimpleCocoonCrawlerImpl.java SimpleLuceneCocoonIndexerImpl.java SimpleLuceneCocoonSearcherImpl.java SimpleLuceneXMLIndexerImpl.java
stefano 01/12/11 15:00:25
Added: src/org/apache/cocoon/components/lucene CocoonCrawler.java
CocoonErrorHandler.java IndexHelperField.java
LuceneCocoonHelper.java LuceneCocoonIndexer.java
LuceneCocoonPager.java LuceneCocoonSearcher.java
LuceneIndexContentHandler.java
LuceneXMLIndexer.java SimpleCocoonCrawlerImpl.java
SimpleLuceneCocoonIndexerImpl.java
SimpleLuceneCocoonSearcherImpl.java
SimpleLuceneXMLIndexerImpl.java
Log:
adding the search components
Revision Changes Path
1.1 xml-cocoon2/src/org/apache/cocoon/components/lucene/CocoonCrawler.java
Index: CocoonCrawler.java
===================================================================
/*****************************************************************************
* Copyright (C) The Apache Software Foundation. All rights reserved. *
* ------------------------------------------------------------------------- *
* This software is published under the terms of the Apache Software License *
* version 1.1, a copy of which has been included with this distribution in *
* the LICENSE file. *
*****************************************************************************/
package org.apache.cocoon.components.lucene;
import org.apache.avalon.framework.component.Component;
import java.util.*;
import java.net.*;
/**
* A cocoon crawler component
*/
public interface CocoonCrawler extends Component {
public final static String ROLE = "org.apache.cocoon.components.lucene.CocoonCrawler";
/**
* start crawlin the URL
*/
public void crawl( URL url );
/**
* iterate over crawled URL
*/
public Iterator iterator();
}
1.1 xml-cocoon2/src/org/apache/cocoon/components/lucene/CocoonErrorHandler.java
Index: CocoonErrorHandler.java
===================================================================
/*****************************************************************************
* Copyright (C) The Apache Software Foundation. All rights reserved. *
* ------------------------------------------------------------------------- *
* This software is published under the terms of the Apache Software License *
* version 1.1, a copy of which has been included with this distribution in *
* the LICENSE file. *
*****************************************************************************/
package org.apache.cocoon.components.lucene;
/**
* Title: lucene
* Description: Demo files using lucene indexer&searcher
* Copyright: Copyright (c) 2001
* Company:
* @author Bernhard Huber
* @version 1.0
*/
import org.xml.sax.SAXException;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXParseException;
public class CocoonErrorHandler implements ErrorHandler
{
private String getExceptionInfo(SAXParseException exception) {
StringBuffer sb = new StringBuffer();
sb.append("pubId: ").append(exception.getPublicId());
sb.append(", ");
sb.append("sysId: ").append(exception.getSystemId());
sb.append(", ");
sb.append("col: ").append(String.valueOf(exception.getColumnNumber()));
sb.append(", ");
sb.append("line: ").append(String.valueOf(exception.getLineNumber()));
sb.append(" ");
return sb.toString();
}
public void error(SAXParseException exception) {
System.err.println("CocoonErrorHandler : " +
getExceptionInfo(exception) +
"error: " +
exception.getMessage());
}
public void fatalError(SAXParseException exception) {
System.err.println("CocoonErrorHandler : " +
getExceptionInfo(exception) +
"fatalError: " +
exception.getMessage());
}
public void warning(SAXParseException exception) {
System.err.println("CocoonErrorHandler : " +
getExceptionInfo(exception) +
"warning: " +
exception.getMessage());
}
}
1.1 xml-cocoon2/src/org/apache/cocoon/components/lucene/IndexHelperField.java
Index: IndexHelperField.java
===================================================================
/*****************************************************************************
* Copyright (C) The Apache Software Foundation. All rights reserved. *
* ------------------------------------------------------------------------- *
* This software is published under the terms of the Apache Software License *
* version 1.1, a copy of which has been included with this distribution in *
* the LICENSE file. *
*****************************************************************************/
package org.apache.cocoon.components.lucene;
/**
* Title: lucene
* Description: Demo files using lucene indexer&searcher
* Copyright: Copyright (c) 2001
* Company:
* @author Bernhard Huber
* @version 1.0
*/
import org.xml.sax.ContentHandler;
import org.xml.sax.ErrorHandler;
import org.xml.sax.Locator;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.Attributes;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateField;
class IndexHelperField {
String localFieldName;
String qualifiedFieldName;
StringBuffer text;
Attributes attributes;
IndexHelperField(String lfn, String qfn, Attributes atts) {
this.localFieldName = lfn;
this.qualifiedFieldName = qfn;
this.attributes = atts;
this.text = new StringBuffer();
}
public String getLocalFieldName() {
return localFieldName;
}
public String getQualifiedFieldName() {
return qualifiedFieldName;
}
public Attributes getAttributes() {
return attributes;
}
public String getText() {
return text.toString();
}
public void appendText(String text) {
this.text.append(text);
}
public void appendText(char[] str, int offset, int length) {
this.text.append(str, offset, length);
}
}
1.1 xml-cocoon2/src/org/apache/cocoon/components/lucene/LuceneCocoonHelper.java
Index: LuceneCocoonHelper.java
===================================================================
/*****************************************************************************
* Copyright (C) The Apache Software Foundation. All rights reserved. *
* ------------------------------------------------------------------------- *
* This software is published under the terms of the Apache Software License *
* version 1.1, a copy of which has been included with this distribution in *
* the LICENSE file. *
*****************************************************************************/
package org.apache.cocoon.components.lucene;
import org.apache.lucene.store.*;
import org.apache.lucene.index.*;
import org.apache.lucene.analysis.Analyzer;
import java.io.File;
import java.io.IOException;
/**
* This class encapsulates some helper methods.
*
*/
public class LuceneCocoonHelper {
public static Directory getDirectory( File directory, boolean create ) throws IOException {
FSDirectory fsDirectory = FSDirectory.getDirectory( directory, create );
return fsDirectory;
}
public static Analyzer getAnalyzer( String analyzer_class_name ) {
Analyzer analyzer = null;
try {
Class analyzer_class = Class.forName( analyzer_class_name );
analyzer = (Analyzer)analyzer_class.newInstance();
} catch (Exception e) {
}
return analyzer;
}
public static IndexReader getIndexReader( Directory directory ) throws IOException {
IndexReader reader = IndexReader.open( directory );
return reader;
}
public static IndexWriter getIndexWriter( Directory index, Analyzer analyzer, boolean create ) throws IOException {
IndexWriter writer = new IndexWriter( index, analyzer, create );
return writer;
}
}
1.1 xml-cocoon2/src/org/apache/cocoon/components/lucene/LuceneCocoonIndexer.java
Index: LuceneCocoonIndexer.java
===================================================================
/*****************************************************************************
* Copyright (C) The Apache Software Foundation. All rights reserved. *
* ------------------------------------------------------------------------- *
* This software is published under the terms of the Apache Software License *
* version 1.1, a copy of which has been included with this distribution in *
* the LICENSE file. *
*****************************************************************************/
package org.apache.cocoon.components.lucene;
import org.apache.avalon.framework.component.Component;
import org.apache.cocoon.ProcessingException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.store.Directory;
import java.net.URL;
public interface LuceneCocoonIndexer extends Component {
public final static String ROLE = "org.apache.cocoon.components.lucene.LuceneCocoonIndexer";
public void setAnalyzer( Analyzer analyzer );
public void index( Directory index, boolean create, URL base_url )
throws ProcessingException;
}
1.1 xml-cocoon2/src/org/apache/cocoon/components/lucene/LuceneCocoonPager.java
Index: LuceneCocoonPager.java
===================================================================
/*****************************************************************************
* Copyright (C) The Apache Software Foundation. All rights reserved. *
* ------------------------------------------------------------------------- *
* This software is published under the terms of the Apache Software License *
* version 1.1, a copy of which has been included with this distribution in *
* the LICENSE file. *
*****************************************************************************/
package org.apache.cocoon.components.lucene;
import org.apache.lucene.store.*;
import org.apache.lucene.index.*;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.Hits;
import org.apache.lucene.document.Document;
import java.io.File;
import java.io.IOException;
import java.util.*;
// implementtion of ListIterator
/**
* This class should help you to manage paging of hits.
*
*
*/
public class LuceneCocoonPager implements ListIterator {
public final static int COUNT_OF_HITS_PER_PAGE_DEFAULT = 5;
public final static int HITS_INDEX_START_DEFAULT = 0;
/**
* hits to iterate upon
*/
private Hits hits;
/**
* current index of hit to return by next()
*/
int hitsIndex = HITS_INDEX_START_DEFAULT;
/**
* maximum count of hits to return by next(), and previous()
*/
int countOfHitsPerPage = COUNT_OF_HITS_PER_PAGE_DEFAULT;
/**
*/
public LuceneCocoonPager( Hits hits ) {
setHits( hits );
}
public LuceneCocoonPager() {
}
public void setHits( Hits hits ) {
this.hits = hits;
this.hitsIndex = HITS_INDEX_START_DEFAULT;
}
/**
* Set count of hits displayed per single page
*/
public void setCountOfHitsPerPage( int countOfHitsPerPage ) {
this.countOfHitsPerPage = countOfHitsPerPage;
}
/**
* Get count of hits displayed per single page
*/
public int getCountOfHitsPerPage() {
return this.countOfHitsPerPage;
}
/**
* Caluclate count of pages for displaying all hits
*/
public int getCountOfPages() {
int count_of_pages = hits.length() / this.countOfHitsPerPage;
int remainder = hits.length() % this.countOfHitsPerPage;
if (remainder != 0) {
count_of_pages += 1;
}
return count_of_pages;
}
/**
* Set starting index for retrieving hits
*/
public int getStartIndex() {
return this.hitsIndex;
}
/**
* Get starting index for retrieving hits
*/
public void setStartIndex( int start_index ) {
this.hitsIndex = start_index;
}
/**
* Inserts the specified element into the list (optional operation).
*/
public void add(Object o) throws UnsupportedOperationException {
throw new UnsupportedOperationException();
}
/**
* Returns true if this list iterator has more elements when traversing
* the list in the forward direction.
*/
public boolean hasNext() {
boolean has_next = hitsIndex < hits.length();
return has_next;
}
/**
* Returns true if this list iterator has more elements when traversing
* the list in the reverse direction.
*/
public boolean hasPrevious() {
boolean has_previous = hitsIndex > countOfHitsPerPage;
return has_previous;
}
/**
* Returns the next element in the list.
*/
public Object next() {
ArrayList hitsPerPageList = new ArrayList();
int endIndex = Math.min( hits.length(), hitsIndex + countOfHitsPerPage );
if (hitsIndex < endIndex) {
while (hitsIndex < endIndex) {
try {
HitWrapper hit_wrapper = new HitWrapper(
hits.score(hitsIndex),
hits.doc(hitsIndex) );
hitsPerPageList.add( hit_wrapper );
} catch (IOException ioe) {
throw new NoSuchElementException( "no more hits: " + ioe.getMessage() );
}
hitsIndex++;
}
} else {
throw new NoSuchElementException();
}
return hitsPerPageList;
}
/**
* Returns the index of the element that would be returned by a
* subsequent call to next.
*/
public int nextIndex() {
int next_index = Math.min( hitsIndex, hits.length() );
return next_index;
}
/**
* Returns the previous element in the list.
*/
public Object previous() {
ArrayList hitsPerPageList = new ArrayList();
int startIndex = Math.max( 0, hitsIndex - 2 * countOfHitsPerPage );
int endIndex = Math.min( hits.length() -1, hitsIndex - countOfHitsPerPage );
if (startIndex < endIndex) {
while (startIndex < endIndex) {
try {
HitWrapper hit_wrapper = new HitWrapper(
hits.score(startIndex),
hits.doc(startIndex) );
hitsPerPageList.add( hit_wrapper );
} catch (IOException ioe) {
throw new NoSuchElementException( "no more hits: " + ioe.getMessage() );
}
startIndex++;
}
hitsIndex = endIndex;
} else {
throw new NoSuchElementException();
}
return hitsPerPageList;
}
/**
* Returns the index of the element that would be returned by a
* subsequent call to previous.
*/
public int previousIndex() {
int previous_index = Math.max( 0, hitsIndex - 2 * countOfHitsPerPage );
return previous_index;
}
/**
* Removes from the list the last element that was returned by next or
* previous (optional operation).
*/
public void remove() {
throw new UnsupportedOperationException();
}
/**
* Replaces the last element returned by next or previous with the
* specified element (optional operation).
*/
public void set(Object o) {
throw new UnsupportedOperationException();
}
/**
* A helper class encapsulating found document, and its score
*/
public static class HitWrapper {
float score;
Document document;
public HitWrapper( float score, Document document ) {
this.document = document;
this.score = score;
}
public Document getDocument() {
return document;
}
public float getScore() {
return score;
}
public String getField(String field) {
return document.get(field);
}
}
}
1.1 xml-cocoon2/src/org/apache/cocoon/components/lucene/LuceneCocoonSearcher.java
Index: LuceneCocoonSearcher.java
===================================================================
/*****************************************************************************
* Copyright (C) The Apache Software Foundation. All rights reserved. *
* ------------------------------------------------------------------------- *
* This software is published under the terms of the Apache Software License *
* version 1.1, a copy of which has been included with this distribution in *
* the LICENSE file. *
*****************************************************************************/
package org.apache.cocoon.components.lucene;
import org.apache.avalon.framework.component.Component;
import org.apache.cocoon.ProcessingException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.search.Hits;
public interface LuceneCocoonSearcher extends Component {
public final static String ROLE = "org.apache.cocoon.components.lucene.LuceneCocoonSearcher";
public void setAnalyzer( Analyzer analyzer );
public void setDirectory( Directory directory );
public Hits search( String query_string, String default_field ) throws ProcessingException;
}
1.1 xml-cocoon2/src/org/apache/cocoon/components/lucene/LuceneIndexContentHandler.java
Index: LuceneIndexContentHandler.java
===================================================================
/*****************************************************************************
* Copyright (C) The Apache Software Foundation. All rights reserved. *
* ------------------------------------------------------------------------- *
* This software is published under the terms of the Apache Software License *
* version 1.1, a copy of which has been included with this distribution in *
* the LICENSE file. *
*****************************************************************************/
package org.apache.cocoon.components.lucene;
/**
* Title: lucene
* Description: Demo files using lucene indexer&searcher
* Copyright: Copyright (c) 2001
* Company:
* @author Bernhard Huber
* @version 1.0
*/
import org.xml.sax.ContentHandler;
import org.xml.sax.Locator;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.AttributesImpl;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateField;
import java.util.Stack;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
/**
* Parse XML and generate lucene document(s)
*/
public class LuceneIndexContentHandler implements ContentHandler {
private List documents;
StringBuffer bodyText;
private Document bodyDocument;
private Stack elementStack;
private int indentPos = 0;
public LuceneIndexContentHandler() {
this.bodyText = new StringBuffer();
this.bodyDocument = new Document();
this.documents = new ArrayList();
this.documents.add( this.bodyDocument );
elementStack = new Stack();
}
public List allDocuments() {
return documents;
}
public Iterator iterator() {
return documents.iterator();
}
private String indent() {
final String LINE_PREFIX = " ";
if (indentPos > LINE_PREFIX.length()) {
return LINE_PREFIX;
} else {
return LINE_PREFIX.substring(0, indentPos);
}
}
private void incIndent() {
indentPos++;
}
private void decIndent() {
indentPos--;
}
public void characters(char[] ch, int start, int length) {
IndexHelperField tos = (IndexHelperField) elementStack.peek();
if (ch.length > 0 && start >= 0 && length > 1 && tos != null) {
String text = new String( ch, start, length );
tos.appendText( text );
bodyText.append( text );
}
}
public void endDocument() {
/* empty */
bodyDocument.add( Field.UnStored( LuceneXMLIndexer.BODY_FIELD, bodyText.toString()) );
}
public void endElement(String namespaceURI, String localName, String qName) {
//System.out.println(indent() + "ee: " + "localName " + localName + " " + "qName " + qName);
IndexHelperField tos = (IndexHelperField) elementStack.pop();
String text = tos.getText();
String lname = tos.getLocalFieldName();
String qname = tos.getQualifiedFieldName();
Document document = new Document();
boolean add_document = false;
if (text != null && text.length() > 0) {
System.out.println( "field qname " + qname );
document.add( Field.UnStored( qName, text ) );
add_document = true;
}
Attributes atts = tos.getAttributes();
if (atts != null && atts.getLength() > 0) {
for (int i = 0; i < atts.getLength(); i++ ) {
String atts_qname = atts.getQName(i);
String atts_value = atts.getValue(i);
System.out.println("attribute field " + qname + "@" + atts_qname + ": " + atts_value );
document.add( Field.UnStored( qname + "@" + atts_qname, atts_value ) );
add_document = true;
}
}
if (add_document) {
documents.add( document );
}
decIndent();
}
public void endPrefixMapping(String prefix) {
/* empty */
System.out.println(indent() + "endPrefixMapping " + prefix );
}
public void ignorableWhitespace(char[] ch, int start, int length) {
/* empty */
System.out.println(indent() + "ignorableWhitspace " );
}
public void processingInstruction(String target, String data) {
/* empty */
System.out.println(indent() + "processingInstruction " + target + " " + data );
}
public void setDocumentLocator(Locator locator) {
/* empty */
System.out.println(indent() + "startDocuementLocator " + locator );
}
public void skippedEntity(String name) {
/* empty */
System.out.println(indent() + "skippedEntity " + name );
}
public void startDocument() {
/* empty */
System.out.println(indent() + "startDocument" );
}
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) {
incIndent();
//System.out.println(indent() + "se: " + "localName " + localName + " " + "qName " + qName);
IndexHelperField ihf = new IndexHelperField(localName, qName, new AttributesImpl(atts) );
elementStack.push(ihf);
}
public void startPrefixMapping(String prefix, String uri) {
/* empty */
System.out.println(indent() + "startPrefixMapping: " + prefix + " " + uri );
}
}
1.1 xml-cocoon2/src/org/apache/cocoon/components/lucene/LuceneXMLIndexer.java
Index: LuceneXMLIndexer.java
===================================================================
/*****************************************************************************
* Copyright (C) The Apache Software Foundation. All rights reserved. *
* ------------------------------------------------------------------------- *
* This software is published under the terms of the Apache Software License *
* version 1.1, a copy of which has been included with this distribution in *
* the LICENSE file. *
*****************************************************************************/
package org.apache.cocoon.components.lucene;
import org.apache.avalon.framework.component.Component;
import org.apache.cocoon.ProcessingException;
import java.util.List;
import java.util.Iterator;
import java.net.URL;
/**
* This interface specifies generating lucene documents from an
* xml content.
*
* <p>
* The well-known fields of a lucene documents are defined as
* <code>*_FIELD</code> constants.
* </p>
* <p>
* You may access generated lucene documents via
* <code>allDocuments()</code>, or <code>iterator()</code>.
* </p>
* <p>
* You trigger the generating of lucene documents via
* <code>build()</code>.
* </p>
*
*/
public interface LuceneXMLIndexer extends Component {
public final static String ROLE = "org.apache.cocoon.components.lucene.LuceneXMLIndexer";
/** Field of document's body, ie <tt>body</tt> (mandatory).
*/
public static final String BODY_FIELD = "body";
public static final String URL_FIELD = "url";
public static final String UID_FIELD = "uid";
/**
* return a list of all lucene documents generated by @see build
*
* @return List list of lucene Documents
*/
public List allDocuments();
/**
* return an iterator of all lucene documents generated by @see build
*
* @return Iterator iterator of lucene Documents
*/
public Iterator iterator();
/**
* Build lucenen documents from a URL
*
* @param url the content of this url gets indexed.
*/
public void build(URL url) throws ProcessingException;
}
1.1 xml-cocoon2/src/org/apache/cocoon/components/lucene/SimpleCocoonCrawlerImpl.java
Index: SimpleCocoonCrawlerImpl.java
===================================================================
/*****************************************************************************
* Copyright (C) The Apache Software Foundation. All rights reserved. *
* ------------------------------------------------------------------------- *
* This software is published under the terms of the Apache Software License *
* version 1.1, a copy of which has been included with this distribution in *
* the LICENSE file. *
*****************************************************************************/
package org.apache.cocoon.components.lucene;
import org.apache.avalon.excalibur.pool.Recyclable;
import org.apache.avalon.framework.activity.Disposable;
import org.apache.avalon.framework.configuration.Configurable;
import org.apache.avalon.framework.configuration.Configuration;
import org.apache.avalon.framework.configuration.ConfigurationException;
import org.apache.avalon.framework.logger.AbstractLoggable;
import org.apache.avalon.framework.parameters.Parameters;
import org.apache.avalon.framework.thread.ThreadSafe;
import org.apache.cocoon.Constants;
import org.apache.cocoon.util.Tokenizer;
import org.apache.log.Logger;
import org.apache.regexp.RE;
import org.apache.regexp.RESyntaxException;
import java.io.*;
import java.util.*;
import java.net.*;
/**
* A simple cocoon crawler.
*/
public class SimpleCocoonCrawlerImpl extends AbstractLoggable
implements CocoonCrawler, Configurable, Disposable, Recyclable {
/**
* Append this query, for querying the link view of an URL
*/
private final static String LINK_VIEW_QUERY_CONFIG = "link-view-query";
private final static String LINK_VIEW_QUERY_DEFAULT = "?cocoon-view=links";
private String linkViewQuery = LINK_VIEW_QUERY_DEFAULT;
/**
* Expected content-type of a link view response.
*/
public final static String LINK_CONTENT_TYPE_CONFIG = "link-content-type";
public final String LINK_CONTENT_TYPE_DEFAULT = "application/x-cocoon-links";
private String linkContentType = LINK_CONTENT_TYPE_DEFAULT;
private final static String EXCLUDE_CONFIG = "exclude";
private HashSet excludeCrawlingURL;
private final static String INCLUDE_CONFIG = "include";
private HashSet includeCrawlingURL;
private final static String USER_AGENT_CONFIG = "user-agent";
private final static String USER_AGENT_DEFAULT = Constants.COMPLETE_NAME;
private String userAgent = USER_AGENT_DEFAULT;
private final static String ACCEPT_CONFIG = "accept";
private final static String ACCEPT_DEFAULT = USER_AGENT_DEFAULT;
private String accept = ACCEPT_DEFAULT;
private HashSet crawled;
private HashSet urlsToProcess;
/**
* configure component
* allow:
* <pre><tt>
* <include>.x</include> or <include>.xxx, .yyy</include>
* <exclude>.x</exclude> or <exclude>.xxx, .yyy</exclude>
* <link-content-type>dfd</link-content-type>
* <link-view-query>dfsd</link-view-query>
* </tt></pre>
*/
public void configure( Configuration configuration )
throws ConfigurationException {
Configuration []children;
children = configuration.getChildren( INCLUDE_CONFIG );
if (children != null) {
for (int i = 0; i < children.length; i++) {
String pattern = children[i].getValue();
try {
Tokenizer t = new Tokenizer( pattern, ", " );
while (t.hasMoreTokens()) {
String tokenized_pattern = t.nextToken();
this.includeCrawlingURL.add( new RE( tokenized_pattern ) );
}
} catch (RESyntaxException rese) {
getLogger().error( "Cannot create includeing regular-expression for " + pattern, rese );
}
}
}
children = configuration.getChildren( EXCLUDE_CONFIG );
if (children != null) {
for (int i = 0; i < children.length; i++) {
String pattern = children[i].getValue();
try {
Tokenizer t = new Tokenizer( pattern, ", " );
while (t.hasMoreTokens()) {
String tokenized_pattern = t.nextToken();
this.excludeCrawlingURL.add( new RE( tokenized_pattern ) );
}
} catch (RESyntaxException rese) {
getLogger().error( "Cannot create excluding regular-expression for " + pattern, rese );
}
}
}
Configuration child;
String value;
child = configuration.getChild( LINK_CONTENT_TYPE_CONFIG, false );
if (child != null) {
value = child.getValue();
if (value != null && value.length() > 0) {
this.linkContentType = value;
}
}
child = configuration.getChild( LINK_VIEW_QUERY_CONFIG, false );
if (child != null) {
value = child.getValue();
if (value != null && value.length() > 0) {
this.linkViewQuery = value;
}
}
child = configuration.getChild( USER_AGENT_CONFIG, false );
if (child != null) {
value = child.getValue();
if (value != null && value.length() > 0) {
this.userAgent = value;
}
}
child = configuration.getChild( ACCEPT_CONFIG, false );
if (child != null) {
value = child.getValue();
if (value != null && value.length() > 0) {
this.accept = value;
}
}
}
/**
* dispose at end of life cycle, releasing all resources.
*/
public void dispose() {
crawled = null;
urlsToProcess = null;
excludeCrawlingURL = null;
includeCrawlingURL = null;
}
/**
* recylcle this object, relasing resources
*/
public void recycle() {
crawled = null;
urlsToProcess = null;
}
public SimpleCocoonCrawlerImpl() {
includeCrawlingURL = null;
excludeCrawlingURL = new HashSet();
setImageExcludeFromCrawling();
}
private void setImageExcludeFromCrawling() {
String []EXCLUDE_FROM_CRAWLING_DEFAULT = {
"*.\\.gif$",
"*.\\.png$",
"*.\\.jpe?g$"
};
for (int i = 0; i < EXCLUDE_FROM_CRAWLING_DEFAULT.length; i++ ) {
String pattern = EXCLUDE_FROM_CRAWLING_DEFAULT[i];
try {
excludeCrawlingURL.add( new RE( pattern ) );
} catch (RESyntaxException rese) {
getLogger().error( "Cannot create excluding regular-expression for " + pattern, rese );
}
}
}
/**
* start crawling
*/
public void crawl( URL url ) {
crawled = new HashSet();
urlsToProcess = new HashSet();
urlsToProcess.add( url );
}
public Iterator iterator() {
return new CocoonCrawlerIterator(this);
}
public static class CocoonCrawlerIterator implements Iterator {
private SimpleCocoonCrawlerImpl cocoonCrawler;
CocoonCrawlerIterator( SimpleCocoonCrawlerImpl cocoonCrawler ) {
this.cocoonCrawler = cocoonCrawler;
}
/**
* check if crawling is finished.
*/
public boolean hasNext() {
return cocoonCrawler.urlsToProcess.size() > 0;
}
/**
* return the next URL
*/
public Object next() {
URL url = null;
Iterator i = cocoonCrawler.urlsToProcess.iterator();
if (i.hasNext()) {
url = (URL)i.next();
cocoonCrawler.urlsToProcess.remove( url );
List url_links = cocoonCrawler.getLinks( url );
if (url_links != null) {
cocoonCrawler.urlsToProcess.addAll( url_links );
}
}
return url;
}
/**
* remove is not implemented
* @exception UnsupportedOperationException is always thrown
*/
public void remove() {
throw new UnsupportedOperationException( "remove is not implemented" );
}
}
/**
* list of links from the parent url
*
* @param url
* @return List of URLs
*/
private List getLinks( URL url ) {
ArrayList url_links = null;
if (!isIncludedURL( url.toString())) {
return null;
}
// don't try to get links for url which is excluded
if (isExcludedURL( url.toString())) {
return null;
}
// don't try to get links for url which has been crawled already
if (crawled.contains( url.toString() )) {
return null;
}
// mark it as crawled
crawled.add( url.toString() );
if (getLogger().isDebugEnabled()) {
getLogger().debug( "Get links of URL: " + url.toString() );
}
// get links of url
try {
URL links_url = new URL( url, url.getPath() + linkViewQuery );
URLConnection links_url_connection = links_url.openConnection();
InputStream is = links_url_connection.getInputStream();
BufferedReader br = new BufferedReader( new InputStreamReader( is ) );
String content_type = links_url_connection.getContentType();
if (getLogger().isDebugEnabled()) {
getLogger().debug( "Content-type: " + content_type );
}
if (content_type.equals( linkContentType )) {
url_links = new ArrayList();
// content is supposed to be a list of links,
// relative to current URL
String line;
while ((line = br.readLine()) != null) {
URL new_url = new URL( url, line );
boolean add_url = true;
add_url &= !crawled.contains( new_url.toString() );
add_url &= isIncludedURL( new_url.toString() );
add_url &= !isExcludedURL( new_url.toString() );
if (add_url) {
if (getLogger().isDebugEnabled()) {
getLogger().debug( "Add URL: " + new_url.toString() );
}
url_links.add( new_url );
}
}
// now we have a list of URL which should be examined
}
} catch (IOException ioe) {
getLogger().warn( "Problems get links of " + url, ioe );
}
return url_links;
}
/**
* check if URL is a candidate for indexing
*/
private boolean isExcludedURL( String url ) {
// by default include URL for crawling
if (excludeCrawlingURL == null) {
return false;
}
final String s = url.toString();
Iterator i = excludeCrawlingURL.iterator();
while (i.hasNext()) {
RE pattern = (RE)i.next();
if (pattern.match( s )) {
return true;
}
}
return false;
}
/**
* check if URL is a candidate for indexing
*/
private boolean isIncludedURL( String url ) {
// by default include URL for crawling
if (excludeCrawlingURL == null) {
return true;
}
final String s = url.toString();
Iterator i = includeCrawlingURL.iterator();
while (i.hasNext()) {
RE pattern = (RE)i.next();
if (pattern.match( s )) {
return true;
}
}
return false;
}
}
1.1 xml-cocoon2/src/org/apache/cocoon/components/lucene/SimpleLuceneCocoonIndexerImpl.java
Index: SimpleLuceneCocoonIndexerImpl.java
===================================================================
/*****************************************************************************
* Copyright (C) The Apache Software Foundation. All rights reserved. *
* ------------------------------------------------------------------------- *
* This software is published under the terms of the Apache Software License *
* version 1.1, a copy of which has been included with this distribution in *
* the LICENSE file. *
*****************************************************************************/
package org.apache.cocoon.components.lucene;
import org.apache.avalon.excalibur.pool.Recyclable;
import org.apache.avalon.framework.activity.Disposable;
import org.apache.avalon.framework.component.ComponentException;
import org.apache.avalon.framework.component.ComponentManager;
import org.apache.avalon.framework.component.Composable;
import org.apache.avalon.framework.configuration.Configurable;
import org.apache.avalon.framework.configuration.Configuration;
import org.apache.avalon.framework.configuration.ConfigurationException;
import org.apache.cocoon.ProcessingException;
import org.apache.avalon.framework.logger.AbstractLoggable;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateField;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.index.*;
import java.io.IOException;
import java.util.Iterator;
import java.net.URL;
/**
*/
public class SimpleLuceneCocoonIndexerImpl extends AbstractLoggable
implements LuceneCocoonIndexer, Configurable, Composable, Disposable {
protected final static String ANALYZER_CLASSNAME_CONFIG = "analyzer-classname";
protected final static String ANALYZER_CLASSNAME_DEFAULT = "org.apache.lucene.analysis.standard.StandardAnalyzer";
private String analyzerClassnameDefault = ANALYZER_CLASSNAME_DEFAULT;
protected final static String DIRECTORY_CONFIG = "directory";
protected final static String DIRECTORY_DEFAULT = null;
private String directoryDefault = DIRECTORY_DEFAULT;
private final static String MERGE_FACTOR_CONFIG = "merge-factor";
private final static int MERGE_FACTOR_DEFAULT = 20;
private int mergeFactor = MERGE_FACTOR_DEFAULT;
/** The component manager instance */
protected ComponentManager manager = null;
Analyzer analyzer;
public void configure( Configuration conf ) throws ConfigurationException {
Configuration child;
String value;
child = conf.getChild( ANALYZER_CLASSNAME_CONFIG, false );
if (child != null) {
value = conf.getValue( ANALYZER_CLASSNAME_DEFAULT );
if (value != null) {
analyzerClassnameDefault = value;
}
}
child = conf.getChild( MERGE_FACTOR_CONFIG, false );
if (child != null) {
mergeFactor = conf.getValueAsInteger( MERGE_FACTOR_DEFAULT );
}
}
/**
* Set the current <code>ComponentManager</code> instance used by this
* <code>Composable</code>.
*/
public void compose(ComponentManager manager) throws ComponentException {
this.manager=manager;
}
public void dispose() {
}
public void setAnalyzer( Analyzer analyzer ) {
this.analyzer = analyzer;
}
/**
* index content of base_url, index content of links from base_url.
*
* @param index the lucene store to write the index to
* @param create iff true create, or overwrite existing index, else
* update existing index.
* @param base_url index content of base_url, and crawl through all its
* links recursivly.
*/
public void index( Directory index, boolean create, URL base_url )
throws ProcessingException {
IndexWriter writer = null;
LuceneXMLIndexer lxi = null;
CocoonCrawler cocoonCrawler = null;
try {
lxi = (LuceneXMLIndexer)manager.lookup( LuceneXMLIndexer.ROLE );
writer = new IndexWriter( index, analyzer, create );
writer.mergeFactor = this.mergeFactor;
cocoonCrawler = (CocoonCrawler)manager.lookup( CocoonCrawler.ROLE );
cocoonCrawler.crawl( base_url );
Iterator cocoonCrawlerIterator = cocoonCrawler.iterator();
while (cocoonCrawlerIterator.hasNext()) {
URL crawl_url = (URL)cocoonCrawlerIterator.next();
if (!crawl_url.getHost().equals( base_url.getHost() ) ||
crawl_url.getPort() != base_url.getPort() ) {
// skip urls using different host, or port than host,
// or port of base url
System.out.println( "Skipping carwling URL " + crawl_url.toString() +
" as base_url is " + base_url.toString() );
continue;
}
// build lucene documents from the content of the crawl_url
lxi.build( crawl_url );
Iterator i = lxi.iterator();
// add all built lucene documents
while (i.hasNext()) {
Document document = (Document)i.next();
writer.addDocument( document );
}
}
// optimize it
writer.optimize();
} catch (IOException ioe) {
throw new ProcessingException( "IOException in index()", ioe );
} catch (ComponentException ce) {
throw new ProcessingException( "ComponentException in index()", ce );
} finally {
if (writer != null) {
try {
writer.close();
} catch (IOException ioe) {
}
writer = null;
}
if (lxi != null) {
manager.release( lxi );
lxi = null;
}
if (cocoonCrawler != null) {
manager.release( cocoonCrawler );
cocoonCrawler = null;
}
}
}
class DocumentDeletableIterator {
private IndexReader reader; // existing index
private TermEnum uidIter; // document id iterator
public DocumentDeletableIterator( Directory directory ) throws IOException {
reader = IndexReader.open( directory ); // open existing index
uidIter = reader.terms( new Term("uid", "")); // init uid iterator
}
protected void finalize() throws Throwable {
super.finalize();
if (uidIter != null) {
uidIter.close(); // close uid iterator
uidIter = null;
}
if (reader != null) {
reader.close(); // close existing index
reader = null;
}
}
public void deleteAllStaleDocuments() throws IOException {
while (uidIter.term() != null && uidIter.term().field() == "uid") {
reader.delete(uidIter.term());
uidIter.next();
}
}
public void deleteModifiedDocuments( String uid ) throws IOException {
while (documentHasBeenModified( uidIter.term(), uid )) {
reader.delete( uidIter.term() );
uidIter.next();
}
if (documentHasNotBeenModified( uidIter.term(), uid )) {
uidIter.next();
}
}
boolean documentIsDeletable( Term term ) {
return term != null && term.field() == "uid";
}
boolean documentHasBeenModified( Term term, String uid ) {
return documentIsDeletable( term )&&
term.text().compareTo(uid) < 0;
}
boolean documentHasNotBeenModified( Term term, String uid ) {
return documentIsDeletable( term ) &&
term.text().compareTo(uid) == 0;
}
}
}
1.1 xml-cocoon2/src/org/apache/cocoon/components/lucene/SimpleLuceneCocoonSearcherImpl.java
Index: SimpleLuceneCocoonSearcherImpl.java
===================================================================
/*****************************************************************************
* Copyright (C) The Apache Software Foundation. All rights reserved. *
* ------------------------------------------------------------------------- *
* This software is published under the terms of the Apache Software License *
* version 1.1, a copy of which has been included with this distribution in *
* the LICENSE file. *
*****************************************************************************/
package org.apache.cocoon.components.lucene;
import org.apache.avalon.excalibur.pool.Recyclable;
import org.apache.avalon.framework.activity.Disposable;
import org.apache.avalon.framework.component.ComponentException;
import org.apache.avalon.framework.component.ComponentManager;
import org.apache.avalon.framework.component.Composable;
import org.apache.avalon.framework.configuration.Configurable;
import org.apache.avalon.framework.configuration.Configuration;
import org.apache.avalon.framework.configuration.ConfigurationException;
import org.apache.cocoon.ProcessingException;
import org.apache.avalon.framework.logger.AbstractLoggable;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateField;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.queryParser.*;
import java.io.IOException;
import java.util.Iterator;
import java.net.URL;
/**
*/
public class SimpleLuceneCocoonSearcherImpl extends AbstractLoggable
implements LuceneCocoonSearcher, Configurable, Composable, Disposable, Recyclable {
protected final static String ANALYZER_CLASSNAME_CONFIG = "analyzer-classname";
protected final static String ANALYZER_CLASSNAME_DEFAULT = "org.apache.lucene.analysis.standard.StandardAnalyzer";
private String analyzerClassnameDefault = ANALYZER_CLASSNAME_DEFAULT;
protected final static String DEFAULT_SEARCH_FIELD_CONFIG = "default-search-field";
protected final static String DEFAULT_SEARCH_FIELD_DEFAULT = "body";
private String defaultSearchFieldDefault = DEFAULT_SEARCH_FIELD_DEFAULT;
protected final static String DEFAULT_QUERY_CONFIG = "default-query";
protected final static String DEFAULT_QUERY_DEFAULT = null;
private String defaultQueryDefault = DEFAULT_QUERY_DEFAULT;
protected final static String QUERYPARSER_CLASSNAME_CONFIG = "queryparser-classname";
protected final static String QUERYPARSER_CLASSNAME_DEFAULT = "org.apache.lucene.queryParser.QueryParser";
private String queryparserClassnameDefault = QUERYPARSER_CLASSNAME_DEFAULT;
protected final static String DIRECTORY_CONFIG = "directory";
protected final static String DIRECTORY_DEFAULT = null;
private String directoryDefault = DIRECTORY_DEFAULT;
/** The component manager instance */
protected ComponentManager manager=null;
private Analyzer analyzer;
private Directory directory;
private IndexSearcher indexSearcher;
private IndexReaderCache indexReaderCache;
/**
* configure
*/
public void configure( Configuration conf ) throws ConfigurationException {
Configuration child;
String value;
child = conf.getChild( ANALYZER_CLASSNAME_CONFIG, false );
if (child != null) {
value = conf.getValue( ANALYZER_CLASSNAME_DEFAULT );
if (value != null) {
analyzerClassnameDefault = value;
}
}
child = conf.getChild( DEFAULT_SEARCH_FIELD_CONFIG, false );
if (child != null) {
value = conf.getValue( DEFAULT_SEARCH_FIELD_DEFAULT );
if (value != null) {
defaultSearchFieldDefault = value;
}
}
child = conf.getChild( DEFAULT_QUERY_CONFIG, false );
if (child != null) {
value = conf.getValue( DEFAULT_QUERY_DEFAULT );
if (value != null) {
defaultQueryDefault = value;
}
}
child = conf.getChild( QUERYPARSER_CLASSNAME_CONFIG, false );
if (child != null) {
value = conf.getValue( QUERYPARSER_CLASSNAME_DEFAULT );
if (value != null) {
queryparserClassnameDefault = value;
}
}
child = conf.getChild( DIRECTORY_CONFIG, false );
if (child != null) {
value = conf.getValue( DIRECTORY_DEFAULT );
if (value != null) {
directoryDefault = value;
}
}
}
/**
* Set the current <code>ComponentManager</code> instance used by this
* <code>Composable</code>.
*/
public void compose(ComponentManager manager) throws ComponentException {
this.manager=manager;
}
public void dispose() {
releaseIndexSearcher();
releaseIndexReaderCache();
}
public void recycle() {
releaseIndexSearcher();
}
private void releaseIndexSearcher() {
if (indexSearcher != null) {
try {
indexSearcher.close();
} catch (IOException ioe) {
// ignore it
}
indexSearcher = null;
}
}
private void releaseIndexReaderCache() {
if (indexReaderCache != null) {
indexReaderCache = null;
}
}
/**
* set an analyzer, overriding the analyzerClassnameDefault.
*/
public void setAnalyzer( Analyzer analyzer ) {
this.analyzer = analyzer;
}
public void setDirectory( Directory directory ) {
this.directory = directory;
indexReaderCache = null;
}
public Hits search( String query_string, String default_field ) throws ProcessingException {
Hits hits = null;
try {
Query query = QueryParser.parse( query_string, default_field, analyzer);
// release index searcher for each new search
releaseIndexSearcher();
IndexSearcher indexSearcher = new IndexSearcher( getReader() );
hits = indexSearcher.search( query );
// do not close indexSearcher now, as using hits needs an
// opened indexSearcher indexSearcher.close();
} catch (ParseException pe) {
throw new ProcessingException( "Cannot parse query " + query_string, pe );
} catch (IOException ioe) {
throw new ProcessingException( "Cannot access hits", ioe );
}
return hits;
}
/**
* This class should help to minimise usage of IndexReaders.
*
*/
static class IndexReaderCache {
private Directory directory;
private IndexReader indexReader;
private long lastModified;
IndexReaderCache( Directory directory ) {
this.directory = directory;
}
protected void finalize() throws Throwable {
if (indexReader != null) {
indexReader.close();
indexReader = null;
}
}
void setIndexReader( IndexReader reader ) throws IOException {
if (indexReader != null) {
indexReader.close();
}
indexReader = reader;
lastModified = indexReader.lastModified(this.directory);
}
IndexReader getIndexReader() {
return indexReader;
}
boolean indexReaderIsValid() throws IOException {
return indexReader != null &&
indexReader.lastModified(this.directory) == lastModified;
}
}
public IndexReader getReader() throws IOException {
if (indexReaderCache == null) {
indexReaderCache = new IndexReaderCache( directory );
}
IndexReader indexReader = null;
if (indexReaderCache.indexReaderIsValid()) {
indexReader = indexReaderCache.getIndexReader();
} else {
indexReader = IndexReader.open( this.directory );
indexReaderCache.setIndexReader( indexReader );
}
return indexReader;
}
}
1.1 xml-cocoon2/src/org/apache/cocoon/components/lucene/SimpleLuceneXMLIndexerImpl.java
Index: SimpleLuceneXMLIndexerImpl.java
===================================================================
/*****************************************************************************
* Copyright (C) The Apache Software Foundation. All rights reserved. *
* ------------------------------------------------------------------------- *
* This software is published under the terms of the Apache Software License *
* version 1.1, a copy of which has been included with this distribution in *
* the LICENSE file. *
*****************************************************************************/
package org.apache.cocoon.components.lucene;
import org.apache.avalon.framework.activity.Disposable;
import org.apache.avalon.framework.configuration.Configurable;
import org.apache.avalon.framework.configuration.Configuration;
import org.apache.avalon.framework.configuration.ConfigurationException;
import org.apache.avalon.framework.component.ComponentException;
import org.apache.avalon.framework.component.ComponentManager;
import org.apache.avalon.framework.component.Composable;
import org.apache.avalon.framework.logger.AbstractLoggable;
import org.apache.avalon.framework.parameters.Parameters;
import org.apache.avalon.framework.thread.ThreadSafe;
import org.apache.avalon.framework.logger.AbstractLoggable;
import org.apache.cocoon.components.parser.Parser;
import org.apache.cocoon.ProcessingException;
import org.apache.cocoon.environment.Source;
import org.apache.cocoon.environment.SourceResolver;
import java.io.*;
import java.util.HashSet;
import java.util.Map;
import java.util.Iterator;
import java.util.List;
import java.util.ArrayList;
import java.net.URL;
import java.net.URLConnection;
import javax.xml.parsers.*;
import org.xml.sax.ContentHandler;
import org.xml.sax.ErrorHandler;
import org.xml.sax.Locator;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.Attributes;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.DateField;
/**
* A simple class building lucene documents from xml content.
*/
public class SimpleLuceneXMLIndexerImpl extends AbstractLoggable
implements LuceneXMLIndexer, Configurable, Composable {
/**
* list of lucene Document objects
*/
List documents;
/**
* The component manager instance
*/
protected ComponentManager manager=null;
/**
* append this string to the url in order to get the
* content view of the url
*/
final String CONTENT_QUERY = "?cocoon-view=content";
/**
* set of allowed content types
*/
final HashSet allowedContentType;
/**
*/
public SimpleLuceneXMLIndexerImpl() {
documents = null;
allowedContentType = new HashSet();
allowedContentType.add( "text/xml" );
allowedContentType.add( "text/xhtml" );
}
/**
* configure
*/
public void configure(Configuration conf) throws ConfigurationException {
}
/**
* Set the current <code>ComponentManager</code> instance used by this
* <code>Composable</code>.
*/
public void compose(ComponentManager manager) throws ComponentException {
this.manager = manager;
}
/**
* return a list of all lucene documents generated by @see build
*
* @return List list of lucene Documents
*/
public List allDocuments() {
return documents;
}
/**
* return an iterator of all lucene documents generated by @see build
*
* @return Iterator iterator of lucene Documents
*/
public Iterator iterator() {
if (documents == null) {
return new ArrayList().iterator();
}
return documents.iterator();
}
/**
* Build lucenen documents from a URL
*
* @param url the content of this url gets indexed.
*/
public void build(URL url)
throws ProcessingException {
try {
URL contentURL = new URL(url, url.getPath() + CONTENT_QUERY );
URLConnection contentURLConnection = contentURL.openConnection();
String contentType = contentURLConnection.getContentType();
if (contentType != null &&
allowedContentType.contains( contentType )) {
LuceneIndexContentHandler luceneIndexContentHandler = new LuceneIndexContentHandler();
indexDocument( contentURLConnection, luceneIndexContentHandler );
//
// document is parsed
//
Iterator it = luceneIndexContentHandler.iterator();
while (it.hasNext()) {
Document d = (Document)it.next();
d.add(Field.UnIndexed( URL_FIELD, url.toString()));
// store ... false, index ... true, token ... false
d.add(new Field( UID_FIELD, uid(contentURLConnection), false, true, false));
}
documents = luceneIndexContentHandler.allDocuments();
}
} catch (IOException ioe) {
throw new ProcessingException( "Cannot read URL " + url, ioe );
} finally {
}
}
/**
* index input stream producing lucene Documents
*
* @param contentURLConnection the xml content which should get indexed.
* @param luceneIndexContentHandler ContentHandler for generating
* a lucene Document from XML content.
*/
private void indexDocument( URLConnection contentURLConnection,
LuceneIndexContentHandler luceneIndexContentHandler )
throws ProcessingException {
InputStream is = null;
InputSource in = null;
Parser parser = null;
try {
is = contentURLConnection.getInputStream();
in = new InputSource(is);
// get an XML parser
parser = (Parser)this.manager.lookup(Parser.ROLE);
//reader.setErrorHandler(new CocoonErrorHandler());
parser.setContentHandler( luceneIndexContentHandler );
parser.parse(in);
//
// document is parsed
//
} catch (IOException ioe) {
throw new ProcessingException( "Cannot read!", ioe );
} catch (SAXException saxe) {
throw new ProcessingException("Cannot parse!", saxe);
} catch (ComponentException ce) {
throw new ProcessingException( "Cannot lookup xml parser!", ce );
} finally {
if (parser != null) this.manager.release(parser);
}
}
/**
* return a unique uid of a url connection
*
* @return String unique uid of a urlConnection
*/
private String uid( URLConnection urlConnection ) {
// Append path and date into a string in such a way that lexicographic
// sorting gives the same results as a walk of the file hierarchy. Thus
// null (\u0000) is used both to separate directory components and to
// separate the path from the date.
return urlConnection.toString().replace('/', '\u0000') +
"\u0000" +
DateField.timeToString(urlConnection.getLastModified());
}
}
----------------------------------------------------------------------
In case of troubles, e-mail: webmaster@xml.apache.org
To unsubscribe, e-mail: cocoon-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: cocoon-cvs-help@xml.apache.org