You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by fe...@apache.org on 2007/07/07 23:07:21 UTC
svn commit: r554265 - in
/cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main:
java/org/apache/cocoon/transformation/LuceneIndexTransformer.java
resources/META-INF/cocoon/spring/cocoon-lucene.xml
Author: felixk
Date: Sat Jul 7 14:07:20 2007
New Revision: 554265
URL: http://svn.apache.org/viewvc?view=rev&rev=554265
Log:
Apply patch from http://issues.apache.org/jira/browse/COCOON-2065.
Thanks to Dominique De Munck for providing the patch
Modified:
cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/transformation/LuceneIndexTransformer.java
cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml
Modified: cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/transformation/LuceneIndexTransformer.java
URL: http://svn.apache.org/viewvc/cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/transformation/LuceneIndexTransformer.java?view=diff&rev=554265&r1=554264&r2=554265
==============================================================================
--- cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/transformation/LuceneIndexTransformer.java (original)
+++ cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/java/org/apache/cocoon/transformation/LuceneIndexTransformer.java Sat Jul 7 14:07:20 2007
@@ -75,7 +75,18 @@
* <dd><p>Class name of the Lucene text analyzer to use. Typically depends on the language of the text being indexed.
* See the Lucene documentation for more information.</p></dd>
* <dt style="font-weight: bold;">merge-factor</dt>
- * <dd>Determines how often segment indices are merged. See the Lucene documentation for more information.</dd>
+ * <dd><p>Determines how often segment indices are merged. See the Lucene documentation for more information.</p></dd>
+ * <dt style="font-weight: bold;">optimize-frequency</dt>
+ * <dd><p>Determines how often the lucene index will be optimized. When you have 1000's of documents, optimizing the index
+ * can become quite slow (eg. 7 seconds for 9000 small docs, P4).</p>
+ *
+ * <ul>
+ * <li>1: always optimize (default)</li>
+ * <li>0: never optimize</li>
+ * <li>x: update every x times. You can use any number, it is a random generator which will determine to optimize or not. </li>
+ * </ul>
+ *
+ * </dd>
* </dl>
* <dl>
* <dt style="font-weight: bold;">A simple example of the input:</dt>
@@ -98,11 +109,11 @@
* </lucene:document>
* </lucene:index>
* </pre>
- * </dd>
- * </dl>
- *
- * @version $Id$
- */
+ * </dd>
+ * </dl>
+ *
+ * @version $Id$
+ */
public class LuceneIndexTransformer extends AbstractTransformer implements CacheableProcessingComponent,
InitializingBean {
@@ -115,6 +126,11 @@
public static final String MERGE_FACTOR_CONFIG = "merge-factor";
public static final String MERGE_FACTOR_PARAMETER = "merge-factor";
public static final int MERGE_FACTOR_DEFAULT = 20;
+ public static final String OPTIMIZE_FREQUENCY_CONFIG = "optimize-frequency";
+ public static final String OPTIMIZE_FREQUENCY_PARAMETER = "optimize-frequency";
+ // by default, optimizing will take place on every update (previous
+ // behaviour)
+ public static final int OPTIMIZE_FREQUENCY_DEFAULT = 1;
public static final String MAX_FIELD_LENGTH_CONFIG = "max-field-length";
public static final String MAX_FIELD_LENGTH_PARAMETER = "max-field-length";
public static final int MAX_FIELD_LENGTH_DEFAULT = IndexWriter.DEFAULT_MAX_FIELD_LENGTH;
@@ -126,6 +142,7 @@
public static final String LUCENE_QUERY_CREATE_ATTRIBUTE = "create";
public static final String LUCENE_QUERY_MERGE_FACTOR_ATTRIBUTE = "merge-factor";
public static final String LUCENE_QUERY_MAX_FIELD_LENGTH_ATTRIBUTE = "max-field-length";
+ public static final String LUCENE_QUERY_OPTIMIZE_FREQUENCY_CONFIG_ATTRIBUTE = "optimize-frequency";
public static final String LUCENE_DOCUMENT_ELEMENT = "document";
public static final String LUCENE_DOCUMENT_URL_ATTRIBUTE = "url";
public static final String LUCENE_ELEMENT_ATTR_TO_TEXT_ATTRIBUTE = "text-attr";
@@ -191,6 +208,9 @@
*/
private int maxFieldLength = MAX_FIELD_LENGTH_DEFAULT;
+ /** Determines how often the lucene index will be optimized. */
+ private int optimizeFrequency = OPTIMIZE_FREQUENCY_DEFAULT;
+
private static String uid(String url) {
return url.replace('/', '\u0000'); // + "\u0000" +
// DateField.timeToString(urlConnection.getLastModified());
@@ -204,7 +224,7 @@
*/
public void afterPropertiesSet() throws IllegalArgumentException {
this.configureConfiguration = new IndexerConfiguration(getAnalyzer(), getDirectory(), getMergeFactor(),
- getMaxFieldLength());
+ getMaxFieldLength(), getOptimizeFrequency());
}
/**
@@ -222,7 +242,8 @@
configureConfiguration.analyzerClassname), parameters.getParameter(DIRECTORY_PARAMETER,
configureConfiguration.indexDirectory), parameters.getParameterAsInteger(MERGE_FACTOR_PARAMETER,
configureConfiguration.indexerMergeFactor), parameters.getParameterAsInteger(
- MAX_FIELD_LENGTH_PARAMETER, configureConfiguration.indexerMaxFieldLength));
+ MAX_FIELD_LENGTH_PARAMETER, configureConfiguration.indexerMaxFieldLength), parameters
+ .getParameterAsInteger(OPTIMIZE_FREQUENCY_PARAMETER, configureConfiguration.indexerOptimizeFrequency));
}
/**
@@ -309,12 +330,15 @@
String indexDirectory = atts.getValue(LUCENE_QUERY_DIRECTORY_ATTRIBUTE);
String mergeFactorStr = atts.getValue(LUCENE_QUERY_MERGE_FACTOR_ATTRIBUTE);
String maxFieldLengthStr = atts.getValue(LUCENE_QUERY_MAX_FIELD_LENGTH_ATTRIBUTE);
+ String optimizeFrequencyStr = atts.getValue(LUCENE_QUERY_OPTIMIZE_FREQUENCY_CONFIG_ATTRIBUTE);
queryConfiguration = new IndexerConfiguration(analyzerClassname != null ? analyzerClassname
: setupConfiguration.analyzerClassname, indexDirectory != null ? indexDirectory
: setupConfiguration.indexDirectory, mergeFactorStr != null ? Integer.parseInt(mergeFactorStr)
: setupConfiguration.indexerMergeFactor, maxFieldLengthStr != null ? Integer
- .parseInt(maxFieldLengthStr) : setupConfiguration.indexerMaxFieldLength);
+ .parseInt(maxFieldLengthStr) : setupConfiguration.indexerMaxFieldLength,
+ optimizeFrequencyStr != null ? Integer.parseInt(optimizeFrequencyStr)
+ : setupConfiguration.indexerOptimizeFrequency);
if (!createIndex) {
// Not asked to create the index - but check if this is
@@ -363,16 +387,18 @@
if (processing == STATE_QUERY) {
if (LUCENE_URI.equals(namespaceURI) && LUCENE_QUERY_ELEMENT.equals(localName)) {
- // End query processing
- try {
- if (this.writer == null) {
- openWriter();
+ if (needToOptimize()) {
+ // End query processing
+ try {
+ if (this.writer == null) {
+ openWriter();
+ }
+ this.writer.optimize();
+ this.writer.close();
+ this.writer = null;
+ } catch (IOException e) {
+ throw new SAXException(e);
}
- this.writer.optimize();
- this.writer.close();
- this.writer = null;
- } catch (IOException e) {
- throw new SAXException(e);
}
// propagate the query element to the next stage in the pipeline
super.endElement(namespaceURI, localName, qName);
@@ -553,13 +579,52 @@
String indexDirectory;
int indexerMergeFactor;
int indexerMaxFieldLength;
+ int indexerOptimizeFrequency;
public IndexerConfiguration(String analyzerClassname, String indexDirectory, int indexerMergeFactor,
- int indexerMaxFieldLength) {
+ int indexerMaxFieldLength, int indexerOptimizeFrequency) {
this.analyzerClassname = analyzerClassname;
this.indexDirectory = indexDirectory;
this.indexerMergeFactor = indexerMergeFactor;
this.indexerMaxFieldLength = indexerMaxFieldLength;
+ this.indexerOptimizeFrequency = indexerOptimizeFrequency;
+ }
+ }
+
+ /**
+ * Will check if, based on the configuration (optimize-frequency option),
+ * the lucene index should be optimized. It uses a random number generator
+ * to determine if it should optimize or not.
+ *
+ * This check was added because of large indexes, optimizing becomes quite
+ * slow.
+ *
+ * From the lucene documentation: The IndexWriter class supports an
+ * optimize() method that compacts the index database and speedup queries.
+ * You may want to use this method after performing a complete indexing of
+ * your document set or after incremental updates of the index. If your
+ * incremental update adds documents frequently, you want to perform the
+ * optimization only once in a while to avoid the extra overhead of the
+ * optimization.
+ *
+ * @return true: yes, we should optimize the index false: no, do not
+ * optimize
+ */
+ private boolean needToOptimize() {
+ int optimizeFrequency = this.queryConfiguration.indexerOptimizeFrequency;
+ if (optimizeFrequency == 0) {
+ return false;
+ }
+ if (optimizeFrequency == 1) {
+ return true;
+ }
+
+ // use a random int to determine if we may execute
+ int randomInt = 1 + (int) (Math.random() * optimizeFrequency);
+ if (randomInt == 1) {
+ return true;
+ } else {
+ return false;
}
}
@@ -621,5 +686,20 @@
*/
public void setMaxFieldLength(int maxFieldLength) {
this.maxFieldLength = maxFieldLength;
+ }
+
+ /**
+ * @return the optimizeFrequency
+ */
+ public int getOptimizeFrequency() {
+ return optimizeFrequency;
+ }
+
+ /**
+ * @param optimizeFrequency
+ * the optimizeFrequency to set
+ */
+ public void setOptimizeFrequency(int optimizeFrequency) {
+ this.optimizeFrequency = optimizeFrequency;
}
}
Modified: cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml
URL: http://svn.apache.org/viewvc/cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml?view=diff&rev=554265&r1=554264&r2=554265
==============================================================================
--- cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml (original)
+++ cocoon/trunk/blocks/cocoon-lucene/cocoon-lucene-impl/src/main/resources/META-INF/cocoon/spring/cocoon-lucene.xml Sat Jul 7 14:07:20 2007
@@ -48,6 +48,8 @@
will effectively be truncated at this point. The default value, 10k, may not be sufficient for
large documents. -->
<property name="maxFieldLength" value="10000" />
+ <!-- Determines how often the lucene index will be optimized. -->
+ <property name="optimizeFrequency" value="1" />
</bean>
<bean name="org.apache.cocoon.components.search.LuceneCocoonIndexer"