You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by fe...@apache.org on 2007/07/07 22:01:52 UTC

svn commit: r554247 - in /cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main: java/org/apache/cocoon/transformation/LuceneIndexTransformer.java resources/META-INF/cocoon/spring/cocoon-lucene.xml

Author: felixk
Date: Sat Jul  7 13:01:50 2007
New Revision: 554247

URL: http://svn.apache.org/viewvc?view=rev&rev=554247
Log:
Springify missed lucene index transformer also.

Modified:
    cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/transformation/LuceneIndexTransformer.java
    cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml

Modified: cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/transformation/LuceneIndexTransformer.java
URL: http://svn.apache.org/viewvc/cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/transformation/LuceneIndexTransformer.java?view=diff&rev=554247&r1=554246&r2=554247
==============================================================================
--- cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/transformation/LuceneIndexTransformer.java (original)
+++ cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/transformation/LuceneIndexTransformer.java Sat Jul  7 13:01:50 2007
@@ -22,31 +22,25 @@
 import java.util.Map;
 import java.util.Stack;
 
-import org.apache.avalon.framework.configuration.Configurable;
-import org.apache.avalon.framework.configuration.Configuration;
-import org.apache.avalon.framework.configuration.ConfigurationException;
 import org.apache.avalon.framework.parameters.Parameters;
-import org.apache.avalon.framework.service.ServiceException;
-import org.apache.avalon.framework.service.ServiceManager;
-import org.apache.avalon.framework.service.Serviceable;
-
 import org.apache.cocoon.ProcessingException;
 import org.apache.cocoon.caching.CacheableProcessingComponent;
 import org.apache.cocoon.components.search.LuceneCocoonHelper;
 import org.apache.cocoon.components.search.LuceneXMLIndexer;
 import org.apache.cocoon.configuration.Settings;
 import org.apache.cocoon.environment.SourceResolver;
+import org.apache.cocoon.spring.configurator.WebAppContextUtils;
 import org.apache.commons.lang.BooleanUtils;
 import org.apache.excalibur.source.SourceValidity;
 import org.apache.excalibur.source.impl.validity.NOPValidity;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.store.Directory;
+import org.springframework.beans.factory.InitializingBean;
 import org.xml.sax.Attributes;
 import org.xml.sax.SAXException;
 import org.xml.sax.helpers.AttributesImpl;
@@ -109,8 +103,8 @@
  *
  * @version $Id$
  */
-public class LuceneIndexTransformer extends AbstractTransformer
-    implements CacheableProcessingComponent, Configurable, Serviceable {
+public class LuceneIndexTransformer extends AbstractTransformer implements CacheableProcessingComponent,
+        InitializingBean {
 
     public static final String ANALYZER_CLASSNAME_CONFIG = "analyzer-classname";
     public static final String ANALYZER_CLASSNAME_PARAMETER = "analyzer-classname";
@@ -141,15 +135,16 @@
 
     // The 3 states of the state machine
     private static final int STATE_GROUND = 0; // initial or "ground" state
-    private static final int STATE_QUERY = 1; // processing a lucene:index (Query) element
-    private static final int STATE_DOCUMENT = 2; // processing a lucene:document element
+    private static final int STATE_QUERY = 1; // processing a lucene:index
+    // (Query) element
+    private static final int STATE_DOCUMENT = 2; // processing a
+    // lucene:document element
 
-    // Initialization time variables
-    protected File workDir = null;
-
-    // Declaration time parameters values (specified in sitemap component config)
+    // Declaration time parameters values (specified in sitemap component
+    // config)
     private IndexerConfiguration configureConfiguration;
-    // Invocation time parameters values (specified in sitemap transform parameters)
+    // Invocation time parameters values (specified in sitemap transform
+    // parameters)
     private IndexerConfiguration setupConfiguration;
     // Parameters specified in the input document
     private IndexerConfiguration queryConfiguration;
@@ -163,15 +158,42 @@
     private String bodyDocumentURL;
     private Stack elementStack = new Stack();
     /**
-     * Storage for the document element's attributes until the document
-     * has been indexed, so that they can be copied to the output
-     * along with a boolean <code>indexed</code> attribute.
+     * Storage for the document element's attributes until the document has been
+     * indexed, so that they can be copied to the output along with a boolean
+     * <code>indexed</code> attribute.
      */
-    private AttributesImpl documentAttributes; 
+    private AttributesImpl documentAttributes;
     private long documentStartTime;
 
+    /**
+     * Class name of the Lucene text analyzer to use. Typically depends on the
+     * language of the text being indexed. See the Lucene documentation for more
+     * information.
+     */
+    private String analyzer = ANALYZER_CLASSNAME_DEFAULT;
+
+    /**
+     * Location of directory where index files are stored. This path is relative
+     * to the Cocoon work directory
+     */
+    private String directory = DIRECTORY_DEFAULT;
+
+    /**
+     * Determines how often segment indices are merged. See the Lucene
+     * documentation for more information.
+     */
+    private int mergeFactor = MERGE_FACTOR_DEFAULT;
+
+    /**
+     * Maximum number of terms to index in a field (as far as the index is
+     * concerned, the document will effectively be truncated at this point. The
+     * default value, 10k, may not be sufficient for large documents.
+     */
+    private int maxFieldLength = MAX_FIELD_LENGTH_DEFAULT;
+
     private static String uid(String url) {
-        return url.replace('/', '\u0000'); // + "\u0000" + DateField.timeToString(urlConnection.getLastModified());
+        return url.replace('/', '\u0000'); // + "\u0000" +
+        // DateField.timeToString(urlConnection.getLastModified());
     }
 
     /**
@@ -180,43 +202,27 @@
      * parameters in the sitemap pipeline, or by attributes of the query
      * element(s) in the XML input document.
      */
-    public void configure(Configuration conf) throws ConfigurationException {
-        this.configureConfiguration = new IndexerConfiguration(
-            conf.getChild(ANALYZER_CLASSNAME_CONFIG).getValue(ANALYZER_CLASSNAME_DEFAULT), 
-            conf.getChild(DIRECTORY_CONFIG).getValue(DIRECTORY_DEFAULT), 
-            conf.getChild(MERGE_FACTOR_CONFIG).getValueAsInteger(MERGE_FACTOR_DEFAULT),
-            conf.getChild(MAX_FIELD_LENGTH_CONFIG).getValueAsInteger(MAX_FIELD_LENGTH_DEFAULT)
-        );
+    public void afterPropertiesSet() throws IllegalArgumentException {
+        this.configureConfiguration = new IndexerConfiguration(getAnalyzer(), getDirectory(), getMergeFactor(),
+                getMaxFieldLength());
     }
 
     /**
-     * Setup the transformer.
-     * Called when the pipeline is assembled.
-     * The parameters are those specified as child elements of the
-     * <code>&lt;map:transform&gt;</code> element in the sitemap.
-     * These parameters are optional: 
-     * If no parameters are specified here then the defaults are 
-     * supplied by the component configuration.
-     * Any parameters specified here may be over-ridden by attributes
-     * of the lucene:index element in the input document.
+     * Setup the transformer. Called when the pipeline is assembled. The
+     * parameters are those specified as child elements of the
+     * <code>&lt;map:transform&gt;</code> element in the sitemap. These
+     * parameters are optional: If no parameters are specified here then the
+     * defaults are supplied by the component configuration. Any parameters
+     * specified here may be over-ridden by attributes of the lucene:index
+     * element in the input document.
      */
     public void setup(SourceResolver resolver, Map objectModel, String src, Parameters parameters)
-    throws ProcessingException, SAXException, IOException {
-        setupConfiguration = new IndexerConfiguration(
-            parameters.getParameter(ANALYZER_CLASSNAME_PARAMETER, configureConfiguration.analyzerClassname),
-            parameters.getParameter(DIRECTORY_PARAMETER, configureConfiguration.indexDirectory),
-            parameters.getParameterAsInteger(MERGE_FACTOR_PARAMETER, configureConfiguration.mergeFactor),
-            parameters.getParameterAsInteger(MAX_FIELD_LENGTH_PARAMETER, configureConfiguration.maxFieldLength)
-        );
-    }
-
-    /**
-     * @see org.apache.avalon.framework.service.Serviceable#service(org.apache.avalon.framework.service.ServiceManager)
-     */
-    public void service(ServiceManager manager) throws ServiceException {
-        final Settings settings = (Settings)manager.lookup(Settings.ROLE);
-        this.workDir = new File(settings.getWorkDirectory());
-        manager.release(settings);
+            throws ProcessingException, SAXException, IOException {
+        setupConfiguration = new IndexerConfiguration(parameters.getParameter(ANALYZER_CLASSNAME_PARAMETER,
+                configureConfiguration.analyzerClassname), parameters.getParameter(DIRECTORY_PARAMETER,
+                configureConfiguration.indexDirectory), parameters.getParameterAsInteger(MERGE_FACTOR_PARAMETER,
+                configureConfiguration.indexerMergeFactor), parameters.getParameterAsInteger(
+                MAX_FIELD_LENGTH_PARAMETER, configureConfiguration.indexerMaxFieldLength));
     }
 
     /**
@@ -225,7 +231,10 @@
     public void recycle() {
         this.processing = STATE_GROUND;
         if (this.writer != null) {
-            try { this.writer.close(); } catch (IOException ioe) { }
+            try {
+                this.writer.close();
+            } catch (IOException ioe) {
+            }
             this.writer = null;
         }
         this.bodyText = null;
@@ -236,9 +245,9 @@
     }
 
     /**
-     * Generate the unique key.
-     * This key must be unique inside the space of this component.
-     *
+     * Generate the unique key. This key must be unique inside the space of this
+     * component.
+     * 
      * @return The generated key
      */
     public Serializable getKey() {
@@ -247,7 +256,7 @@
 
     /**
      * Generate the validity object.
-     *
+     * 
      * @return The generated validity object or <code>null</code> if the
      *         component is currently not cacheable.
      */
@@ -265,20 +274,23 @@
 
     /**
      * Begin the scope of a prefix-URI Namespace mapping.
-     *
-     * @param prefix The Namespace prefix being declared.
-     * @param uri The Namespace URI the prefix is mapped to.
+     * 
+     * @param prefix
+     *            The Namespace prefix being declared.
+     * @param uri
+     *            The Namespace URI the prefix is mapped to.
      */
     public void startPrefixMapping(String prefix, String uri) throws SAXException {
         if (processing == STATE_GROUND) {
-            super.startPrefixMapping(prefix,uri);
+            super.startPrefixMapping(prefix, uri);
         }
     }
 
     /**
      * End the scope of a prefix-URI mapping.
-     *
-     * @param prefix The prefix that was being mapping.
+     * 
+     * @param prefix
+     *            The prefix that was being mapping.
      */
     public void endPrefixMapping(String prefix) throws SAXException {
         if (processing == STATE_GROUND) {
@@ -286,28 +298,27 @@
         }
     }
 
-    public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
-        throws SAXException {
+    public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException {
 
         if (processing == STATE_GROUND) {
-            if (LUCENE_URI.equals(namespaceURI) && LUCENE_QUERY_ELEMENT.equals(localName)){
+            if (LUCENE_URI.equals(namespaceURI) && LUCENE_QUERY_ELEMENT.equals(localName)) {
                 String sCreate = atts.getValue(LUCENE_QUERY_CREATE_ATTRIBUTE);
                 createIndex = BooleanUtils.toBoolean(sCreate);
 
                 String analyzerClassname = atts.getValue(LUCENE_QUERY_ANALYZER_ATTRIBUTE);
-                String indexDirectory  = atts.getValue(LUCENE_QUERY_DIRECTORY_ATTRIBUTE);
-                String mergeFactor = atts.getValue(LUCENE_QUERY_MERGE_FACTOR_ATTRIBUTE);
-                String maxFieldLength = atts.getValue(LUCENE_QUERY_MAX_FIELD_LENGTH_ATTRIBUTE);
-
-                queryConfiguration = new IndexerConfiguration(
-                    analyzerClassname != null ? analyzerClassname : setupConfiguration.analyzerClassname,
-                    indexDirectory != null ? indexDirectory : setupConfiguration.indexDirectory,
-                    mergeFactor != null ? Integer.parseInt(mergeFactor) : setupConfiguration.mergeFactor,
-                    maxFieldLength != null ? Integer.parseInt(maxFieldLength) : setupConfiguration.maxFieldLength
-                );
+                String indexDirectory = atts.getValue(LUCENE_QUERY_DIRECTORY_ATTRIBUTE);
+                String mergeFactorStr = atts.getValue(LUCENE_QUERY_MERGE_FACTOR_ATTRIBUTE);
+                String maxFieldLengthStr = atts.getValue(LUCENE_QUERY_MAX_FIELD_LENGTH_ATTRIBUTE);
+
+                queryConfiguration = new IndexerConfiguration(analyzerClassname != null ? analyzerClassname
+                        : setupConfiguration.analyzerClassname, indexDirectory != null ? indexDirectory
+                        : setupConfiguration.indexDirectory, mergeFactorStr != null ? Integer.parseInt(mergeFactorStr)
+                        : setupConfiguration.indexerMergeFactor, maxFieldLengthStr != null ? Integer
+                        .parseInt(maxFieldLengthStr) : setupConfiguration.indexerMaxFieldLength);
 
                 if (!createIndex) {
-                    // Not asked to create the index - but check if this is necessary anyway:
+                    // Not asked to create the index - but check if this is
+                    // necessary anyway:
                     try {
                         IndexReader reader = openReader();
                         reader.close();
@@ -324,7 +335,7 @@
             }
         } else if (processing == STATE_QUERY) {
             // processing a lucene:index - expecting a lucene:document
-            if (LUCENE_URI.equals(namespaceURI) && LUCENE_DOCUMENT_ELEMENT.equals(localName)){
+            if (LUCENE_URI.equals(namespaceURI) && LUCENE_DOCUMENT_ELEMENT.equals(localName)) {
                 this.bodyDocumentURL = atts.getValue(LUCENE_DOCUMENT_URL_ATTRIBUTE);
                 if (this.bodyDocumentURL == null) {
                     throw new SAXException("<lucene:document> must have @url attribute");
@@ -332,7 +343,8 @@
 
                 // Remember the time the document indexing began
                 this.documentStartTime = System.currentTimeMillis();
-                // remember these attributes so they can be passed on to the next stage in the pipeline,
+                // remember these attributes so they can be passed on to the
+                // next stage in the pipeline,
                 // when this document element is ended.
                 this.documentAttributes = new AttributesImpl(atts);
                 this.bodyText = new StringBuffer();
@@ -347,8 +359,7 @@
         }
     }
 
-    public void endElement(String namespaceURI, String localName, String qName)
-        throws SAXException {
+    public void endElement(String namespaceURI, String localName, String qName) throws SAXException {
 
         if (processing == STATE_QUERY) {
             if (LUCENE_URI.equals(namespaceURI) && LUCENE_QUERY_ELEMENT.equals(localName)) {
@@ -377,7 +388,8 @@
 
                 this.bodyDocument.add(Field.UnIndexed(LuceneXMLIndexer.URL_FIELD, this.bodyDocumentURL));
                 // store: false, index: true, tokenize: false
-                this.bodyDocument.add(new Field(LuceneXMLIndexer.UID_FIELD, uid(this.bodyDocumentURL), false, true, false));
+                this.bodyDocument.add(new Field(LuceneXMLIndexer.UID_FIELD, uid(this.bodyDocumentURL), false, true,
+                        false));
                 try {
                     reindexDocument();
                 } catch (IOException e) {
@@ -385,20 +397,16 @@
                 }
                 this.bodyDocumentURL = null;
 
-                // propagate the lucene:document element to the next stage in the pipeline
+                // propagate the lucene:document element to the next stage in
+                // the pipeline
                 long elapsedTime = System.currentTimeMillis() - this.documentStartTime;
-                //documentAttributes = new AttributesImpl();
-                this.documentAttributes.addAttribute(
-                    "", 
-                    LUCENE_ELAPSED_TIME_ATTRIBUTE, 
-                    LUCENE_ELAPSED_TIME_ATTRIBUTE, 
-                    CDATA, 
-                    String.valueOf(elapsedTime)
-                );
+                // documentAttributes = new AttributesImpl();
+                this.documentAttributes.addAttribute("", LUCENE_ELAPSED_TIME_ATTRIBUTE, LUCENE_ELAPSED_TIME_ATTRIBUTE,
+                        CDATA, String.valueOf(elapsedTime));
                 super.startElement(namespaceURI, localName, qName, this.documentAttributes);
                 super.endElement(namespaceURI, localName, qName);
                 this.processing = STATE_QUERY;
-            } else {                
+            } else {
                 // End element processing
                 IndexHelperField tos = (IndexHelperField) elementStack.pop();
                 StringBuffer text = tos.getText();
@@ -437,8 +445,7 @@
         }
     }
 
-    public void characters(char[] ch, int start, int length)
-        throws SAXException {
+    public void characters(char[] ch, int start, int length) throws SAXException {
 
         if (processing == STATE_DOCUMENT && ch.length > 0 && start >= 0 && length > 1 && elementStack.size() > 0) {
             String text = new String(ch, start, length);
@@ -450,8 +457,11 @@
         }
     }
 
-    private void openWriter() throws IOException {		
-    		File indexDirectory = new File(queryConfiguration.indexDirectory);
+    private void openWriter() throws IOException {
+        final Settings settings = (Settings) WebAppContextUtils.getCurrentWebApplicationContext().getBean(
+                "org.apache.cocoon.configuration.Settings");
+        final File workDir = new File(settings.getWorkDirectory());
+        File indexDirectory = new File(queryConfiguration.indexDirectory);
         if (!indexDirectory.isAbsolute()) {
             indexDirectory = new File(workDir, queryConfiguration.indexDirectory);
         }
@@ -466,11 +476,14 @@
         Directory directory = LuceneCocoonHelper.getDirectory(indexDirectory, createIndex);
         Analyzer analyzer = LuceneCocoonHelper.getAnalyzer(queryConfiguration.analyzerClassname);
         this.writer = new IndexWriter(directory, analyzer, createIndex);
-        this.writer.mergeFactor = queryConfiguration.mergeFactor;
-        this.writer.maxFieldLength = queryConfiguration.maxFieldLength;
-    }    
+        this.writer.mergeFactor = queryConfiguration.indexerMergeFactor;
+        this.writer.maxFieldLength = queryConfiguration.indexerMaxFieldLength;
+    }
 
     private IndexReader openReader() throws IOException {
+        final Settings settings = (Settings) WebAppContextUtils.getCurrentWebApplicationContext().getBean(
+                "org.apache.cocoon.configuration.Settings");
+        final File workDir = new File(settings.getWorkDirectory());
         File indexDirectory = new File(queryConfiguration.indexDirectory);
         if (!indexDirectory.isAbsolute()) {
             indexDirectory = new File(workDir, queryConfiguration.indexDirectory);
@@ -478,23 +491,27 @@
         Directory directory = LuceneCocoonHelper.getDirectory(indexDirectory, createIndex);
         IndexReader reader = IndexReader.open(directory);
         return reader;
-    }    
+    }
 
     private void reindexDocument() throws IOException {
         if (this.createIndex) {
-            // The index is being created, so there's no need to delete the doc from an existing index.
-            // This means we can keep a single IndexWriter open throughout the process.
+            // The index is being created, so there's no need to delete the doc
+            // from an existing index.
+            // This means we can keep a single IndexWriter open throughout the
+            // process.
             if (this.writer == null) {
                 openWriter();
             }
             this.writer.addDocument(this.bodyDocument);
         } else {
-            // This is an incremental reindex, so the document should be removed from the index before adding it
+            // This is an incremental reindex, so the document should be removed
+            // from the index before adding it
             try {
                 IndexReader reader = openReader();
                 reader.delete(new Term(LuceneXMLIndexer.UID_FIELD, uid(this.bodyDocumentURL)));
                 reader.close();
-            } catch (IOException e) { /* ignore */ }
+            } catch (IOException e) { /* ignore */
+            }
             openWriter();
             this.writer.addDocument(this.bodyDocument);
             this.writer.close();
@@ -534,18 +551,75 @@
     static class IndexerConfiguration {
         String analyzerClassname;
         String indexDirectory;
-        int mergeFactor;
-        int maxFieldLength;
+        int indexerMergeFactor;
+        int indexerMaxFieldLength;
 
-        public IndexerConfiguration(String analyzerClassname,
-                                    String indexDirectory,
-                                    int mergeFactor,
-                                    int maxFieldLength) 
-        {
+        public IndexerConfiguration(String analyzerClassname, String indexDirectory, int indexerMergeFactor,
+                int indexerMaxFieldLength) {
             this.analyzerClassname = analyzerClassname;
             this.indexDirectory = indexDirectory;
-            this.mergeFactor = mergeFactor;
-            this.maxFieldLength = maxFieldLength;
+            this.indexerMergeFactor = indexerMergeFactor;
+            this.indexerMaxFieldLength = indexerMaxFieldLength;
         }
+    }
+
+    /**
+     * @return the analyzer
+     */
+    public String getAnalyzer() {
+        return analyzer;
+    }
+
+    /**
+     * @param analyzer
+     *            the analyzer to set
+     */
+    public void setAnalyzer(String analyzer) {
+        this.analyzer = analyzer;
+    }
+
+    /**
+     * @return the directory
+     */
+    public String getDirectory() {
+        return directory;
+    }
+
+    /**
+     * @param directory
+     *            the directory to set
+     */
+    public void setDirectory(String directory) {
+        this.directory = directory;
+    }
+
+    /**
+     * @return the mergeFactor
+     */
+    public int getMergeFactor() {
+        return mergeFactor;
+    }
+
+    /**
+     * @param mergeFactor
+     *            the mergeFactor to set
+     */
+    public void setMergeFactor(int mergeFactor) {
+        this.mergeFactor = mergeFactor;
+    }
+
+    /**
+     * @return the maxFieldLength
+     */
+    public int getMaxFieldLength() {
+        return maxFieldLength;
+    }
+
+    /**
+     * @param maxFieldLength
+     *            the maxFieldLength to set
+     */
+    public void setMaxFieldLength(int maxFieldLength) {
+        this.maxFieldLength = maxFieldLength;
     }
 }

Modified: cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml
URL: http://svn.apache.org/viewvc/cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml?view=diff&rev=554247&r1=554246&r2=554247
==============================================================================
--- cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml (original)
+++ cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml Sat Jul  7 13:01:50 2007
@@ -33,6 +33,23 @@
     <property name="luceneCocoonSearcher" ref="org.apache.cocoon.components.search.LuceneCocoonSearcher" />
   </bean>
 
+  <bean name="org.apache.cocoon.transformation.Transformer/luceneIndexer"
+      class="org.apache.cocoon.transformation.LuceneIndexTransformer"
+      scope="prototype">
+    <!-- Class name of the Lucene text analyzer to use. Typically depends on the language of the 
+         text being indexed. See the Lucene documentation for more information. -->
+    <property name="analyzer" value="org.apache.lucene.analysis.standard.StandardAnalyzer" />
+    <!-- Location of directory where index files are stored. This path is relative to the Cocoon 
+         work directory -->
+    <property name="directory" value="index" />
+    <!-- Determines how often segment indices are merged. See the Lucene documentation for more information. -->
+    <property name="mergeFactor" value="20" />
+    <!-- Maximum number of terms to index in a field (as far as the index is concerned, the document
+         will effectively be truncated at this point. The default value, 10k, may not be sufficient for
+         large documents. -->
+    <property name="maxFieldLength" value="10000" />
+  </bean>
+  
   <bean name="org.apache.cocoon.components.search.LuceneCocoonIndexer"
         class="org.apache.cocoon.components.search.SimpleLuceneCocoonIndexerImpl">
     <!-- XML Indexer implementation. -->