You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by fe...@apache.org on 2007/07/07 22:01:52 UTC
svn commit: r554247 - in
/cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main:
java/org/apache/cocoon/transformation/LuceneIndexTransformer.java
resources/META-INF/cocoon/spring/cocoon-lucene.xml
Author: felixk
Date: Sat Jul 7 13:01:50 2007
New Revision: 554247
URL: http://svn.apache.org/viewvc?view=rev&rev=554247
Log:
Springify missed lucene index transformer also.
Modified:
cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/transformation/LuceneIndexTransformer.java
cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml
Modified: cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/transformation/LuceneIndexTransformer.java
URL: http://svn.apache.org/viewvc/cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/transformation/LuceneIndexTransformer.java?view=diff&rev=554247&r1=554246&r2=554247
==============================================================================
--- cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/transformation/LuceneIndexTransformer.java (original)
+++ cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/transformation/LuceneIndexTransformer.java Sat Jul 7 13:01:50 2007
@@ -22,31 +22,25 @@
import java.util.Map;
import java.util.Stack;
-import org.apache.avalon.framework.configuration.Configurable;
-import org.apache.avalon.framework.configuration.Configuration;
-import org.apache.avalon.framework.configuration.ConfigurationException;
import org.apache.avalon.framework.parameters.Parameters;
-import org.apache.avalon.framework.service.ServiceException;
-import org.apache.avalon.framework.service.ServiceManager;
-import org.apache.avalon.framework.service.Serviceable;
-
import org.apache.cocoon.ProcessingException;
import org.apache.cocoon.caching.CacheableProcessingComponent;
import org.apache.cocoon.components.search.LuceneCocoonHelper;
import org.apache.cocoon.components.search.LuceneXMLIndexer;
import org.apache.cocoon.configuration.Settings;
import org.apache.cocoon.environment.SourceResolver;
+import org.apache.cocoon.spring.configurator.WebAppContextUtils;
import org.apache.commons.lang.BooleanUtils;
import org.apache.excalibur.source.SourceValidity;
import org.apache.excalibur.source.impl.validity.NOPValidity;
-
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
+import org.springframework.beans.factory.InitializingBean;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
@@ -109,8 +103,8 @@
*
* @version $Id$
*/
-public class LuceneIndexTransformer extends AbstractTransformer
- implements CacheableProcessingComponent, Configurable, Serviceable {
+public class LuceneIndexTransformer extends AbstractTransformer implements CacheableProcessingComponent,
+ InitializingBean {
public static final String ANALYZER_CLASSNAME_CONFIG = "analyzer-classname";
public static final String ANALYZER_CLASSNAME_PARAMETER = "analyzer-classname";
@@ -141,15 +135,16 @@
// The 3 states of the state machine
private static final int STATE_GROUND = 0; // initial or "ground" state
- private static final int STATE_QUERY = 1; // processing a lucene:index (Query) element
- private static final int STATE_DOCUMENT = 2; // processing a lucene:document element
+ private static final int STATE_QUERY = 1; // processing a lucene:index
+ // (Query) element
+ private static final int STATE_DOCUMENT = 2; // processing a
+ // lucene:document element
- // Initialization time variables
- protected File workDir = null;
-
- // Declaration time parameters values (specified in sitemap component config)
+ // Declaration time parameters values (specified in sitemap component
+ // config)
private IndexerConfiguration configureConfiguration;
- // Invocation time parameters values (specified in sitemap transform parameters)
+ // Invocation time parameters values (specified in sitemap transform
+ // parameters)
private IndexerConfiguration setupConfiguration;
// Parameters specified in the input document
private IndexerConfiguration queryConfiguration;
@@ -163,15 +158,42 @@
private String bodyDocumentURL;
private Stack elementStack = new Stack();
/**
- * Storage for the document element's attributes until the document
- * has been indexed, so that they can be copied to the output
- * along with a boolean <code>indexed</code> attribute.
+ * Storage for the document element's attributes until the document has been
+ * indexed, so that they can be copied to the output along with a boolean
+ * <code>indexed</code> attribute.
*/
- private AttributesImpl documentAttributes;
+ private AttributesImpl documentAttributes;
private long documentStartTime;
+ /**
+ * Class name of the Lucene text analyzer to use. Typically depends on the
+ * language of the text being indexed. See the Lucene documentation for more
+ * information.
+ */
+ private String analyzer = ANALYZER_CLASSNAME_DEFAULT;
+
+ /**
+ * Location of directory where index files are stored. This path is relative
+ * to the Cocoon work directory
+ */
+ private String directory = DIRECTORY_DEFAULT;
+
+ /**
+ * Determines how often segment indices are merged. See the Lucene
+ * documentation for more information.
+ */
+ private int mergeFactor = MERGE_FACTOR_DEFAULT;
+
+ /**
+ * Maximum number of terms to index in a field (as far as the index is
+ * concerned, the document will effectively be truncated at this point. The
+ * default value, 10k, may not be sufficient for large documents.
+ */
+ private int maxFieldLength = MAX_FIELD_LENGTH_DEFAULT;
+
private static String uid(String url) {
- return url.replace('/', '\u0000'); // + "\u0000" + DateField.timeToString(urlConnection.getLastModified());
+ return url.replace('/', '\u0000'); // + "\u0000" +
+ // DateField.timeToString(urlConnection.getLastModified());
}
/**
@@ -180,43 +202,27 @@
* parameters in the sitemap pipeline, or by attributes of the query
* element(s) in the XML input document.
*/
- public void configure(Configuration conf) throws ConfigurationException {
- this.configureConfiguration = new IndexerConfiguration(
- conf.getChild(ANALYZER_CLASSNAME_CONFIG).getValue(ANALYZER_CLASSNAME_DEFAULT),
- conf.getChild(DIRECTORY_CONFIG).getValue(DIRECTORY_DEFAULT),
- conf.getChild(MERGE_FACTOR_CONFIG).getValueAsInteger(MERGE_FACTOR_DEFAULT),
- conf.getChild(MAX_FIELD_LENGTH_CONFIG).getValueAsInteger(MAX_FIELD_LENGTH_DEFAULT)
- );
+ public void afterPropertiesSet() throws IllegalArgumentException {
+ this.configureConfiguration = new IndexerConfiguration(getAnalyzer(), getDirectory(), getMergeFactor(),
+ getMaxFieldLength());
}
/**
- * Setup the transformer.
- * Called when the pipeline is assembled.
- * The parameters are those specified as child elements of the
- * <code><map:transform></code> element in the sitemap.
- * These parameters are optional:
- * If no parameters are specified here then the defaults are
- * supplied by the component configuration.
- * Any parameters specified here may be over-ridden by attributes
- * of the lucene:index element in the input document.
+ * Setup the transformer. Called when the pipeline is assembled. The
+ * parameters are those specified as child elements of the
+ * <code><map:transform></code> element in the sitemap. These
+ * parameters are optional: If no parameters are specified here then the
+ * defaults are supplied by the component configuration. Any parameters
+ * specified here may be over-ridden by attributes of the lucene:index
+ * element in the input document.
*/
public void setup(SourceResolver resolver, Map objectModel, String src, Parameters parameters)
- throws ProcessingException, SAXException, IOException {
- setupConfiguration = new IndexerConfiguration(
- parameters.getParameter(ANALYZER_CLASSNAME_PARAMETER, configureConfiguration.analyzerClassname),
- parameters.getParameter(DIRECTORY_PARAMETER, configureConfiguration.indexDirectory),
- parameters.getParameterAsInteger(MERGE_FACTOR_PARAMETER, configureConfiguration.mergeFactor),
- parameters.getParameterAsInteger(MAX_FIELD_LENGTH_PARAMETER, configureConfiguration.maxFieldLength)
- );
- }
-
- /**
- * @see org.apache.avalon.framework.service.Serviceable#service(org.apache.avalon.framework.service.ServiceManager)
- */
- public void service(ServiceManager manager) throws ServiceException {
- final Settings settings = (Settings)manager.lookup(Settings.ROLE);
- this.workDir = new File(settings.getWorkDirectory());
- manager.release(settings);
+ throws ProcessingException, SAXException, IOException {
+ setupConfiguration = new IndexerConfiguration(parameters.getParameter(ANALYZER_CLASSNAME_PARAMETER,
+ configureConfiguration.analyzerClassname), parameters.getParameter(DIRECTORY_PARAMETER,
+ configureConfiguration.indexDirectory), parameters.getParameterAsInteger(MERGE_FACTOR_PARAMETER,
+ configureConfiguration.indexerMergeFactor), parameters.getParameterAsInteger(
+ MAX_FIELD_LENGTH_PARAMETER, configureConfiguration.indexerMaxFieldLength));
}
/**
@@ -225,7 +231,10 @@
public void recycle() {
this.processing = STATE_GROUND;
if (this.writer != null) {
- try { this.writer.close(); } catch (IOException ioe) { }
+ try {
+ this.writer.close();
+ } catch (IOException ioe) {
+ }
this.writer = null;
}
this.bodyText = null;
@@ -236,9 +245,9 @@
}
/**
- * Generate the unique key.
- * This key must be unique inside the space of this component.
- *
+ * Generate the unique key. This key must be unique inside the space of this
+ * component.
+ *
* @return The generated key
*/
public Serializable getKey() {
@@ -247,7 +256,7 @@
/**
* Generate the validity object.
- *
+ *
* @return The generated validity object or <code>null</code> if the
* component is currently not cacheable.
*/
@@ -265,20 +274,23 @@
/**
* Begin the scope of a prefix-URI Namespace mapping.
- *
- * @param prefix The Namespace prefix being declared.
- * @param uri The Namespace URI the prefix is mapped to.
+ *
+ * @param prefix
+ * The Namespace prefix being declared.
+ * @param uri
+ * The Namespace URI the prefix is mapped to.
*/
public void startPrefixMapping(String prefix, String uri) throws SAXException {
if (processing == STATE_GROUND) {
- super.startPrefixMapping(prefix,uri);
+ super.startPrefixMapping(prefix, uri);
}
}
/**
* End the scope of a prefix-URI mapping.
- *
- * @param prefix The prefix that was being mapping.
+ *
+ * @param prefix
+ * The prefix that was being mapping.
*/
public void endPrefixMapping(String prefix) throws SAXException {
if (processing == STATE_GROUND) {
@@ -286,28 +298,27 @@
}
}
- public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
- throws SAXException {
+ public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
if (processing == STATE_GROUND) {
- if (LUCENE_URI.equals(namespaceURI) && LUCENE_QUERY_ELEMENT.equals(localName)){
+ if (LUCENE_URI.equals(namespaceURI) && LUCENE_QUERY_ELEMENT.equals(localName)) {
String sCreate = atts.getValue(LUCENE_QUERY_CREATE_ATTRIBUTE);
createIndex = BooleanUtils.toBoolean(sCreate);
String analyzerClassname = atts.getValue(LUCENE_QUERY_ANALYZER_ATTRIBUTE);
- String indexDirectory = atts.getValue(LUCENE_QUERY_DIRECTORY_ATTRIBUTE);
- String mergeFactor = atts.getValue(LUCENE_QUERY_MERGE_FACTOR_ATTRIBUTE);
- String maxFieldLength = atts.getValue(LUCENE_QUERY_MAX_FIELD_LENGTH_ATTRIBUTE);
-
- queryConfiguration = new IndexerConfiguration(
- analyzerClassname != null ? analyzerClassname : setupConfiguration.analyzerClassname,
- indexDirectory != null ? indexDirectory : setupConfiguration.indexDirectory,
- mergeFactor != null ? Integer.parseInt(mergeFactor) : setupConfiguration.mergeFactor,
- maxFieldLength != null ? Integer.parseInt(maxFieldLength) : setupConfiguration.maxFieldLength
- );
+ String indexDirectory = atts.getValue(LUCENE_QUERY_DIRECTORY_ATTRIBUTE);
+ String mergeFactorStr = atts.getValue(LUCENE_QUERY_MERGE_FACTOR_ATTRIBUTE);
+ String maxFieldLengthStr = atts.getValue(LUCENE_QUERY_MAX_FIELD_LENGTH_ATTRIBUTE);
+
+ queryConfiguration = new IndexerConfiguration(analyzerClassname != null ? analyzerClassname
+ : setupConfiguration.analyzerClassname, indexDirectory != null ? indexDirectory
+ : setupConfiguration.indexDirectory, mergeFactorStr != null ? Integer.parseInt(mergeFactorStr)
+ : setupConfiguration.indexerMergeFactor, maxFieldLengthStr != null ? Integer
+ .parseInt(maxFieldLengthStr) : setupConfiguration.indexerMaxFieldLength);
if (!createIndex) {
- // Not asked to create the index - but check if this is necessary anyway:
+ // Not asked to create the index - but check if this is
+ // necessary anyway:
try {
IndexReader reader = openReader();
reader.close();
@@ -324,7 +335,7 @@
}
} else if (processing == STATE_QUERY) {
// processing a lucene:index - expecting a lucene:document
- if (LUCENE_URI.equals(namespaceURI) && LUCENE_DOCUMENT_ELEMENT.equals(localName)){
+ if (LUCENE_URI.equals(namespaceURI) && LUCENE_DOCUMENT_ELEMENT.equals(localName)) {
this.bodyDocumentURL = atts.getValue(LUCENE_DOCUMENT_URL_ATTRIBUTE);
if (this.bodyDocumentURL == null) {
throw new SAXException("<lucene:document> must have @url attribute");
@@ -332,7 +343,8 @@
// Remember the time the document indexing began
this.documentStartTime = System.currentTimeMillis();
- // remember these attributes so they can be passed on to the next stage in the pipeline,
+ // remember these attributes so they can be passed on to the
+ // next stage in the pipeline,
// when this document element is ended.
this.documentAttributes = new AttributesImpl(atts);
this.bodyText = new StringBuffer();
@@ -347,8 +359,7 @@
}
}
- public void endElement(String namespaceURI, String localName, String qName)
- throws SAXException {
+ public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
if (processing == STATE_QUERY) {
if (LUCENE_URI.equals(namespaceURI) && LUCENE_QUERY_ELEMENT.equals(localName)) {
@@ -377,7 +388,8 @@
this.bodyDocument.add(Field.UnIndexed(LuceneXMLIndexer.URL_FIELD, this.bodyDocumentURL));
// store: false, index: true, tokenize: false
- this.bodyDocument.add(new Field(LuceneXMLIndexer.UID_FIELD, uid(this.bodyDocumentURL), false, true, false));
+ this.bodyDocument.add(new Field(LuceneXMLIndexer.UID_FIELD, uid(this.bodyDocumentURL), false, true,
+ false));
try {
reindexDocument();
} catch (IOException e) {
@@ -385,20 +397,16 @@
}
this.bodyDocumentURL = null;
- // propagate the lucene:document element to the next stage in the pipeline
+ // propagate the lucene:document element to the next stage in
+ // the pipeline
long elapsedTime = System.currentTimeMillis() - this.documentStartTime;
- //documentAttributes = new AttributesImpl();
- this.documentAttributes.addAttribute(
- "",
- LUCENE_ELAPSED_TIME_ATTRIBUTE,
- LUCENE_ELAPSED_TIME_ATTRIBUTE,
- CDATA,
- String.valueOf(elapsedTime)
- );
+ // documentAttributes = new AttributesImpl();
+ this.documentAttributes.addAttribute("", LUCENE_ELAPSED_TIME_ATTRIBUTE, LUCENE_ELAPSED_TIME_ATTRIBUTE,
+ CDATA, String.valueOf(elapsedTime));
super.startElement(namespaceURI, localName, qName, this.documentAttributes);
super.endElement(namespaceURI, localName, qName);
this.processing = STATE_QUERY;
- } else {
+ } else {
// End element processing
IndexHelperField tos = (IndexHelperField) elementStack.pop();
StringBuffer text = tos.getText();
@@ -437,8 +445,7 @@
}
}
- public void characters(char[] ch, int start, int length)
- throws SAXException {
+ public void characters(char[] ch, int start, int length) throws SAXException {
if (processing == STATE_DOCUMENT && ch.length > 0 && start >= 0 && length > 1 && elementStack.size() > 0) {
String text = new String(ch, start, length);
@@ -450,8 +457,11 @@
}
}
- private void openWriter() throws IOException {
- File indexDirectory = new File(queryConfiguration.indexDirectory);
+ private void openWriter() throws IOException {
+ final Settings settings = (Settings) WebAppContextUtils.getCurrentWebApplicationContext().getBean(
+ "org.apache.cocoon.configuration.Settings");
+ final File workDir = new File(settings.getWorkDirectory());
+ File indexDirectory = new File(queryConfiguration.indexDirectory);
if (!indexDirectory.isAbsolute()) {
indexDirectory = new File(workDir, queryConfiguration.indexDirectory);
}
@@ -466,11 +476,14 @@
Directory directory = LuceneCocoonHelper.getDirectory(indexDirectory, createIndex);
Analyzer analyzer = LuceneCocoonHelper.getAnalyzer(queryConfiguration.analyzerClassname);
this.writer = new IndexWriter(directory, analyzer, createIndex);
- this.writer.mergeFactor = queryConfiguration.mergeFactor;
- this.writer.maxFieldLength = queryConfiguration.maxFieldLength;
- }
+ this.writer.mergeFactor = queryConfiguration.indexerMergeFactor;
+ this.writer.maxFieldLength = queryConfiguration.indexerMaxFieldLength;
+ }
private IndexReader openReader() throws IOException {
+ final Settings settings = (Settings) WebAppContextUtils.getCurrentWebApplicationContext().getBean(
+ "org.apache.cocoon.configuration.Settings");
+ final File workDir = new File(settings.getWorkDirectory());
File indexDirectory = new File(queryConfiguration.indexDirectory);
if (!indexDirectory.isAbsolute()) {
indexDirectory = new File(workDir, queryConfiguration.indexDirectory);
@@ -478,23 +491,27 @@
Directory directory = LuceneCocoonHelper.getDirectory(indexDirectory, createIndex);
IndexReader reader = IndexReader.open(directory);
return reader;
- }
+ }
private void reindexDocument() throws IOException {
if (this.createIndex) {
- // The index is being created, so there's no need to delete the doc from an existing index.
- // This means we can keep a single IndexWriter open throughout the process.
+ // The index is being created, so there's no need to delete the doc
+ // from an existing index.
+ // This means we can keep a single IndexWriter open throughout the
+ // process.
if (this.writer == null) {
openWriter();
}
this.writer.addDocument(this.bodyDocument);
} else {
- // This is an incremental reindex, so the document should be removed from the index before adding it
+ // This is an incremental reindex, so the document should be removed
+ // from the index before adding it
try {
IndexReader reader = openReader();
reader.delete(new Term(LuceneXMLIndexer.UID_FIELD, uid(this.bodyDocumentURL)));
reader.close();
- } catch (IOException e) { /* ignore */ }
+ } catch (IOException e) { /* ignore */
+ }
openWriter();
this.writer.addDocument(this.bodyDocument);
this.writer.close();
@@ -534,18 +551,75 @@
static class IndexerConfiguration {
String analyzerClassname;
String indexDirectory;
- int mergeFactor;
- int maxFieldLength;
+ int indexerMergeFactor;
+ int indexerMaxFieldLength;
- public IndexerConfiguration(String analyzerClassname,
- String indexDirectory,
- int mergeFactor,
- int maxFieldLength)
- {
+ public IndexerConfiguration(String analyzerClassname, String indexDirectory, int indexerMergeFactor,
+ int indexerMaxFieldLength) {
this.analyzerClassname = analyzerClassname;
this.indexDirectory = indexDirectory;
- this.mergeFactor = mergeFactor;
- this.maxFieldLength = maxFieldLength;
+ this.indexerMergeFactor = indexerMergeFactor;
+ this.indexerMaxFieldLength = indexerMaxFieldLength;
}
+ }
+
+ /**
+ * @return the analyzer
+ */
+ public String getAnalyzer() {
+ return analyzer;
+ }
+
+ /**
+ * @param analyzer
+ * the analyzer to set
+ */
+ public void setAnalyzer(String analyzer) {
+ this.analyzer = analyzer;
+ }
+
+ /**
+ * @return the directory
+ */
+ public String getDirectory() {
+ return directory;
+ }
+
+ /**
+ * @param directory
+ * the directory to set
+ */
+ public void setDirectory(String directory) {
+ this.directory = directory;
+ }
+
+ /**
+ * @return the mergeFactor
+ */
+ public int getMergeFactor() {
+ return mergeFactor;
+ }
+
+ /**
+ * @param mergeFactor
+ * the mergeFactor to set
+ */
+ public void setMergeFactor(int mergeFactor) {
+ this.mergeFactor = mergeFactor;
+ }
+
+ /**
+ * @return the maxFieldLength
+ */
+ public int getMaxFieldLength() {
+ return maxFieldLength;
+ }
+
+ /**
+ * @param maxFieldLength
+ * the maxFieldLength to set
+ */
+ public void setMaxFieldLength(int maxFieldLength) {
+ this.maxFieldLength = maxFieldLength;
}
}
Modified: cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml
URL: http://svn.apache.org/viewvc/cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml?view=diff&rev=554247&r1=554246&r2=554247
==============================================================================
--- cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml (original)
+++ cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml Sat Jul 7 13:01:50 2007
@@ -33,6 +33,23 @@
<property name="luceneCocoonSearcher" ref="org.apache.cocoon.components.search.LuceneCocoonSearcher" />
</bean>
+ <bean name="org.apache.cocoon.transformation.Transformer/luceneIndexer"
+ class="org.apache.cocoon.transformation.LuceneIndexTransformer"
+ scope="prototype">
+ <!-- Class name of the Lucene text analyzer to use. Typically depends on the language of the
+ text being indexed. See the Lucene documentation for more information. -->
+ <property name="analyzer" value="org.apache.lucene.analysis.standard.StandardAnalyzer" />
+ <!-- Location of directory where index files are stored. This path is relative to the Cocoon
+ work directory -->
+ <property name="directory" value="index" />
+ <!-- Determines how often segment indices are merged. See the Lucene documentation for more information. -->
+ <property name="mergeFactor" value="20" />
+ <!-- Maximum number of terms to index in a field (as far as the index is concerned, the document
+ will effectively be truncated at this point. The default value, 10k, may not be sufficient for
+ large documents. -->
+ <property name="maxFieldLength" value="10000" />
+ </bean>
+
<bean name="org.apache.cocoon.components.search.LuceneCocoonIndexer"
class="org.apache.cocoon.components.search.SimpleLuceneCocoonIndexerImpl">
<!-- XML Indexer implementation. -->