You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lenya.apache.org by mi...@apache.org on 2005/11/16 16:14:18 UTC
svn commit: r345031 [1/2] - in /lenya/trunk/src/java/org/apache/cocoon: ./
components/ components/search/ components/search/analyzer/
components/search/components/ components/search/components/impl/
components/search/fieldmodel/ components/search/utils...
Author: michi
Date: Wed Nov 16 07:14:03 2005
New Revision: 345031
URL: http://svn.apache.org/viewcvs?rev=345031&view=rev
Log:
lucene specific classes re Robert Goenes code
Added:
lenya/trunk/src/java/org/apache/cocoon/
lenya/trunk/src/java/org/apache/cocoon/components/
lenya/trunk/src/java/org/apache/cocoon/components/search/
lenya/trunk/src/java/org/apache/cocoon/components/search/Index.java
lenya/trunk/src/java/org/apache/cocoon/components/search/IndexException.java
lenya/trunk/src/java/org/apache/cocoon/components/search/IndexStructure.java
lenya/trunk/src/java/org/apache/cocoon/components/search/analyzer/
lenya/trunk/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableAnalyzer.java
lenya/trunk/src/java/org/apache/cocoon/components/search/analyzer/ConfigurablePerFieldAnalyzer.java
lenya/trunk/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableStopwordAnalyzer.java
lenya/trunk/src/java/org/apache/cocoon/components/search/components/
lenya/trunk/src/java/org/apache/cocoon/components/search/components/AnalyzerManager.java
lenya/trunk/src/java/org/apache/cocoon/components/search/components/IndexManager.java
lenya/trunk/src/java/org/apache/cocoon/components/search/components/Indexer.java
lenya/trunk/src/java/org/apache/cocoon/components/search/components/Searcher.java
lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/
lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/AbstractIndexer.java
lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/AbstractSearcher.java
lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/AnalyzerManagerImpl.java
lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/DefaultIndexerImpl.java
lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/DefaultSearcherImpl.java
lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/IndexManagerImpl.java
lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/ParallelIndexerImpl.java
lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/ParallelSearcherImpl.java
lenya/trunk/src/java/org/apache/cocoon/components/search/fieldmodel/
lenya/trunk/src/java/org/apache/cocoon/components/search/fieldmodel/DateFieldDefinition.java
lenya/trunk/src/java/org/apache/cocoon/components/search/fieldmodel/FieldDefinition.java
lenya/trunk/src/java/org/apache/cocoon/components/search/fieldmodel/StringFieldDefinition.java
lenya/trunk/src/java/org/apache/cocoon/components/search/lucene2.roles
lenya/trunk/src/java/org/apache/cocoon/components/search/utils/
lenya/trunk/src/java/org/apache/cocoon/components/search/utils/SourceHelper.java
lenya/trunk/src/java/org/apache/cocoon/transformation/
lenya/trunk/src/java/org/apache/cocoon/transformation/LuceneIndexTransformer2.java
lenya/trunk/src/java/org/apache/cocoon/transformation/LuceneIndexTransformerOptimized.java
Added: lenya/trunk/src/java/org/apache/cocoon/components/search/Index.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/Index.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/Index.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/Index.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,311 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cocoon.components.search;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Date;
+
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.avalon.framework.service.ServiceException;
+import org.apache.avalon.framework.service.ServiceManager;
+import org.apache.cocoon.components.search.components.AnalyzerManager;
+import org.apache.cocoon.components.search.components.Indexer;
+import org.apache.cocoon.components.search.fieldmodel.DateFieldDefinition;
+import org.apache.cocoon.components.search.fieldmodel.FieldDefinition;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+
+/**
+ * Index Class
+ *
+ * @author Nicolas Maisonneuve
+ */
+public class Index {
+
+ /**
+ * default analyzer ID
+ */
+ private String defaultAnalyzer;
+
+ /**
+ * Index Structure definition
+ */
+ private IndexStructure structure;
+
+ /**
+ * Index ID
+ */
+ private String id;
+
+ /**
+ * Lucene Directory of the index
+ */
+ private Directory directory;
+
+ /**
+ * Number of try to access to the indexer
+ *
+ */
+ private int numtries = 5;
+
+ /**
+ * is the indexer working (not released)
+ */
+ private boolean indexer_busy;
+
+ /**
+ * Indexer Role name
+ */
+ private String indexer_role;
+
+ private ServiceManager manager;
+
+ /**
+ * Create a lucene document
+ *
+ * @param uid
+ * String the document uid
+ * @return Document a empty document
+ */
+ public Document createDocument(String uid) {
+ Document doc = new Document();
+ try {
+ doc.add(createField(Indexer.DOCUMENT_UID_FIELD, uid));
+ } catch (IndexException ex) {
+ }
+ return doc;
+ }
+
+ /**
+ * create a lucene field
+ *
+ * @param fieldname
+ * String fieldname (must existed in the index structure)
+ * @param value
+ * String value
+ */
+ public Field createField(String fieldname, String value)
+ throws IndexException {
+ FieldDefinition f = structure.getFieldDef(fieldname);
+ if (f == null) {
+ throw new IndexException("Field with the name: " + fieldname
+ + " doesn't exist");
+ }
+ return f.createLField(value);
+ }
+
+ /**
+ * create a lucene field for date value
+ *
+ * @param fieldname
+ * String fieldname (must existed in the index structure)
+ * @param value
+ * String value
+ */
+ public Field createField(String fieldname, Date value)
+ throws IndexException {
+ DateFieldDefinition f = (DateFieldDefinition) structure
+ .getFieldDef(fieldname);
+ if (f == null) {
+ throw new IndexException("Field with the name: " + fieldname
+ + " doesn't exist");
+ }
+ return f.createLField(value);
+ }
+
+ /**
+ * get the indexer of the index
+ *
+ * @throws IndexException
+ * @return Indexer
+ */
+ public synchronized Indexer getIndexer() throws IndexException {
+
+ int tmptries = numtries;
+
+ // wait the end of the indexing
+ while (indexer_busy && tmptries > 0) {
+ try {
+ Thread.sleep(1000);
+ } catch (InterruptedException ex) {
+ }
+ tmptries--;
+ }
+
+ if (indexer_busy) {
+ throw new IndexException(
+ "Timeout to access to the indexer (the indexer is indexing)");
+ }
+ AnalyzerManager analyzerM = null;
+ try {
+
+ indexer_busy = true;
+ Indexer indexer = (Indexer) this.manager.lookup(indexer_role);
+
+ // update maybe the analyzer
+ analyzerM = (AnalyzerManager) this.manager
+ .lookup(AnalyzerManager.ROLE);
+
+ indexer.setAnalyzer(analyzerM.getAnalyzer(getDefaultAnalyzerID()));
+ indexer.setIndex(directory);
+
+ return indexer;
+ } catch (ServiceException ex1) {
+ throw new IndexException(ex1);
+ } catch (ConfigurationException ex2) {
+ throw new IndexException(ex2);
+ } finally {
+ if (analyzerM != null) {
+ manager.release(analyzerM);
+ }
+ }
+ }
+
+ /**
+ * Release the indexer
+ *
+ * @param indexer
+ */
+ public synchronized void releaseIndexer(Indexer indexer) {
+ if (indexer != null) {
+ this.manager.release(indexer);
+ indexer_busy = false;
+ }
+ }
+
+ /**
+ * get the index ID
+ *
+ * @return the index ID
+ */
+ public String getID() {
+ return id;
+ }
+
+ /**
+ * Set the index ID
+ *
+ * @param id
+ * index ID
+ */
+ public void setID(String id) {
+ this.id = id;
+ }
+
+ /**
+ * get the default Analyzer
+ *
+ * @return the id of the default analyzer
+ */
+ public String getDefaultAnalyzerID() {
+ return defaultAnalyzer;
+ }
+
+ /**
+ * set the default Analyzer
+ *
+ * @param defaultAnalyzerID
+ * the id of the default Analyzer
+ */
+ public void setDefaultAnalyzerID(String defaultAnalyzerID) {
+ this.defaultAnalyzer = defaultAnalyzerID;
+ }
+
+ /**
+ * Return the index Structure
+ *
+ * @return the index Structure
+ */
+ public IndexStructure getStructure() {
+ return structure;
+ }
+
+ /**
+ * Set the index structure
+ *
+ * @param structure
+ * IndexStructure
+ */
+ public void setStructure(IndexStructure structure) {
+ this.structure = structure;
+ }
+
+ public void setManager(ServiceManager manager) {
+ this.manager = manager;
+ }
+
+ /**
+ * get the lucene directory
+ *
+ * @return the lucene directory
+ */
+ public Directory getDirectory() {
+ return directory;
+ }
+
+ /**
+ * Set the lucene Directory
+ *
+ * @param dir
+ * lucene Directory
+ * @return success or not
+ * @throws IOException
+ */
+ public boolean setDirectory(Directory dir) throws IOException {
+ boolean locked = false;
+ this.directory = dir;
+
+ // if index is locked
+ if (IndexReader.isLocked(directory)) {
+ IndexReader.unlock(directory);
+ locked = true;
+ }
+
+ // create index if the index doesn't exist
+ if (!IndexReader.indexExists(directory)) {
+ (new IndexWriter(directory, null, true)).close();
+ }
+
+ return locked;
+
+ }
+
+ /**
+ * Set the index path directory
+ *
+ * @param path
+ * String
+ * @throws IOException
+ */
+ public boolean setDirectory(String path) throws IOException {
+ File fpath = new File(path);
+ Directory dir = FSDirectory.getDirectory(fpath, !fpath.exists());
+ return setDirectory(dir);
+ }
+
+ /**
+ * @param indexer The indexer to set.
+ */
+ public void setIndexer(String indexer) {
+ this.indexer_role = indexer;
+ }
+}
Added: lenya/trunk/src/java/org/apache/cocoon/components/search/IndexException.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/IndexException.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/IndexException.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/IndexException.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,56 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ *
+ * You may obtain a copy of the License at
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIE
+ * 0S OR CONDITIONS OF ANY KIND, either express or implied.
+ * 0See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search;
+
+/**
+ * Index Exception class
+ *
+ * @author Nicolas Maisonneuve
+ */
+public class IndexException extends Exception {
+
+ private String message;
+
+ public IndexException(String mes) {
+ this(mes, null);
+ }
+
+ public IndexException(Exception ex) {
+ this("", ex);
+ }
+
+ /**
+ * Constructor
+ *
+ * @param mes
+ * message
+ * @param ex
+ * initial exception
+ */
+ public IndexException(String mes, Exception ex) {
+
+ message = mes;
+ if (ex != null) {
+ initCause(ex);
+ }
+ }
+
+ public String getMessage() {
+ return "message: " + message;
+ }
+
+}
Added: lenya/trunk/src/java/org/apache/cocoon/components/search/IndexStructure.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/IndexStructure.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/IndexStructure.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/IndexStructure.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,112 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.cocoon.components.search.components.Indexer;
+import org.apache.cocoon.components.search.fieldmodel.FieldDefinition;
+
+/**
+ * Index Definition class, contain all the index field definitions.
+ *
+ * @author Nicolas Maisonneuve
+ *
+ */
+public final class IndexStructure {
+
+ private Map fielddefs;
+
+ public IndexStructure() {
+ fielddefs = new HashMap();
+
+ // A index has always an UID field
+ FieldDefinition fielddef = FieldDefinition.create(
+ Indexer.DOCUMENT_UID_FIELD, FieldDefinition.KEYWORD);
+ fielddef.setStore(true);
+ this.addFieldDef(fielddef);
+
+ }
+
+ /**
+ * add a fieldDefiniition to the indexDefinition
+ *
+ * @param fielddef
+ */
+ public void addFieldDef(FieldDefinition fielddef) {
+ if (fielddefs.containsKey(fielddef.name())) {
+ throw new IllegalArgumentException(" field with the name "
+ + fielddef.name() + " is already used");
+ }
+ fielddefs.put(fielddef.name(), fielddef);
+ }
+
+ /**
+ * @return all fieldnames contained in the index
+ */
+ public final String[] getFieldNames() {
+ Set results = fielddefs.keySet();
+ return (String[]) results.toArray(new String[results.size()]);
+ }
+
+ /**
+ * return all fieldDefinitions
+ *
+ * @return FieldDefinition[]
+ */
+ public final FieldDefinition[] getFieldDef() {
+ Collection results = fielddefs.values();
+ return (FieldDefinition[]) results.toArray(new FieldDefinition[results
+ .size()]);
+ }
+
+ /**
+ * Return the fieldDefinition associated to the name
+ *
+ * @param fieldname
+ * String the name of the fieldDefiniation
+ * @return FieldDefinition
+ */
+ public final FieldDefinition getFieldDef(String fieldname) {
+ return (FieldDefinition) fielddefs.get(fieldname);
+ }
+
+ /**
+ * check if this field exist
+ *
+ * @param name
+ * the field's name
+ * @return true if a field with this name exist
+ */
+ public final boolean hasField(String name) {
+ return fielddefs.containsKey(name.intern());
+ }
+
+ public String toString() {
+ String result = new String("DocumentFactory:");
+ Iterator iter = this.fielddefs.values().iterator();
+ while (iter.hasNext()) {
+ FieldDefinition item = (FieldDefinition) iter.next();
+ result += "\n" + item.toString();
+ }
+ return result;
+ }
+
+}
Added: lenya/trunk/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableAnalyzer.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableAnalyzer.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableAnalyzer.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableAnalyzer.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,154 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.analyzer;
+
+import java.io.Reader;
+
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.avalon.framework.logger.LogEnabled;
+import org.apache.avalon.framework.logger.Logger;
+import org.apache.cocoon.components.search.components.AnalyzerManager;
+import org.apache.cocoon.components.search.utils.SourceHelper;
+import org.apache.excalibur.source.Source;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+
+/**
+ * Analyzer Wrapper to allow to configure a lucene analyzer with a XML file
+ *
+ * @author Nicolas Maisonneuve
+ */
+public abstract class ConfigurableAnalyzer extends Analyzer implements
+ LogEnabled {
+
+ /**
+ * the lucene analyzer
+ */
+ protected Analyzer analyzer;
+
+ /**
+ * a logger
+ */
+ protected Logger logger;
+
+ /**
+ * the analyzer manager component
+ */
+ protected AnalyzerManager analyzerM;
+
+ /**
+ * Check config file or not (to update the analyzer if the config file
+ * changes)
+ */
+ private boolean checkConfigFile = false;
+
+ /**
+ * Configuration file source
+ */
+ private Source configFile;
+
+ /**
+ * Configure this analyzer. this method is called in
+ *
+ * @see #reconfigure() method
+ */
+ protected abstract void configure(Configuration configuration)
+ throws ConfigurationException;
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.lucene.analysis.Analyzer#tokenStream(java.lang.String,
+ * java.io.Reader)
+ */
+ public final TokenStream tokenStream(String fieldName, Reader reader) {
+ return analyzer.tokenStream(fieldName, reader);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.avalon.framework.logger.LogEnabled#enableLogging(org.apache.avalon.framework.logger.Logger)
+ */
+ public void enableLogging(Logger log) {
+ logger = log;
+ }
+
+ /**
+ * Enable the check of the config file (to update the analyzer if the config
+ * file changes) when the method
+ *
+ * @see org.apache.cocoon.component.search.components.AnalyzerManager#getAnalyzer(String)
+ * is called
+ * @param check
+ * true if we want that
+ */
+ public void setEnableCheckFile(boolean check) {
+ this.checkConfigFile = check;
+ }
+
+ /**
+ * is the checkFile property enable ?
+ */
+ public boolean enableCheckFile() {
+ return this.checkConfigFile;
+ }
+
+ /**
+ * reconfigure the analyzer if the config file has changed
+ *
+ * @throws ConfigurationException
+ * @return boolean true if the analyzer is reconfigured (=file has changed)
+ * else false
+ */
+ public boolean reconfigure() throws ConfigurationException {
+ if (!SourceHelper.checkSourceValidity(configFile)) {
+ logger.info("reconfiguration of " + this.getClass().getName()
+ + " (the source " + configFile.getURI()
+ + " has changed...) ");
+ Configuration conf = SourceHelper.build(configFile);
+ configure(conf);
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * Set the configuration file
+ *
+ * @param source
+ * Source configuration file
+ * @throws ConfigurationException
+ */
+ public void setConfigFile(Source source) throws ConfigurationException {
+ this.configFile = source;
+ SourceHelper.registerSource(configFile);
+ configure(SourceHelper.build(configFile));
+ }
+
+ /**
+ * set the analyzerManager
+ *
+ * @param analyzerM
+ * AnalyzerManager
+ */
+ public void setAnalyerManager(AnalyzerManager analyzerM) {
+ this.analyzerM = analyzerM;
+ }
+
+}
Added: lenya/trunk/src/java/org/apache/cocoon/components/search/analyzer/ConfigurablePerFieldAnalyzer.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/analyzer/ConfigurablePerFieldAnalyzer.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/analyzer/ConfigurablePerFieldAnalyzer.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/analyzer/ConfigurablePerFieldAnalyzer.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,103 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.analyzer;
+
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.PerFieldAnalyzerWrapper;
+
+/**
+ * Configurable PerFieldAnalyzerWrapper. Allow one analyzer per field for
+ * indexing a document (useful for multilanguage document)
+ *
+ * (@link org.apache.lucene.analysis.PerFieldAnalyzerWrapper class)
+ *
+ *
+ * A config file for this analyzer is:
+ *
+ * <!-- if a lucene document containing a field not present in the "field" tags,
+ * the defaultAnalyzer would be used --> <config defaultAnalyzer="analyzerEN">
+ * <fields><!-- if a lucene document contains the field "summury" , the
+ * analyzer "analyzerEN" would be used --> <field name="summury"
+ * analyzer="analyzerEN"/> <field name="desc_fr" analyzer="analyzerFR"/> <field
+ * name="desc_en" analyzer="analyzerEN"/> <field name="desc_de"
+ * analyzer="analyzerDE"/> </fields> </config>
+ *
+ * @author Nicolas Maisonneuve
+ */
+public class ConfigurablePerFieldAnalyzer extends ConfigurableAnalyzer {
+
+ public static final String CONFIG_DEFAULTANALYZER_ATTRIBUTE = "defaultAnalyzer";
+
+ public static final String FIELDS_ELEMENT = "fields";
+
+ public static final String FIELD_ELEMENT = "field";
+
+ public static final String FIELD_NAME_ATTRIBUTE = "name";
+
+ public static final String FIELD_ANALYZERID_ATTRIBUTE = "analyzer";
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.analyzer.ConfigurableAnalyzer#configure(org.apache.avalon.framework.configuration.Configuration)
+ */
+ public void configure(Configuration configuration)
+ throws ConfigurationException {
+
+ String analyzerid = configuration
+ .getAttribute(CONFIG_DEFAULTANALYZER_ATTRIBUTE);
+
+ Analyzer analyzer = analyzerM.getAnalyzer(analyzerid);
+ if (analyzer == null) {
+ throw new ConfigurationException("analyzer " + analyzerid
+ + " doesn't exist");
+ }
+
+ PerFieldAnalyzerWrapper tmpanalyzer = new PerFieldAnalyzerWrapper(
+ analyzer);
+ Configuration[] conffield = configuration.getChild(FIELDS_ELEMENT)
+ .getChildren(FIELD_ELEMENT);
+
+ for (int i = 0; i < conffield.length; i++) {
+
+ String fieldname = conffield[i].getAttribute(FIELD_NAME_ATTRIBUTE);
+ analyzerid = conffield[i].getAttribute(FIELD_ANALYZERID_ATTRIBUTE);
+
+ if (fieldname == null || fieldname.equals("")) {
+ throw new ConfigurationException("element " + FIELD_ELEMENT
+ + " must have the " + FIELD_NAME_ATTRIBUTE
+ + " attribute");
+ }
+ if (analyzerid == null || analyzerid.equals("")) {
+ throw new ConfigurationException("element " + FIELD_ELEMENT
+ + " must have the " + FIELD_ANALYZERID_ATTRIBUTE
+ + " attribute");
+ }
+
+ analyzer = analyzerM.getAnalyzer(analyzerid);
+
+ if (analyzer == null) {
+ throw new ConfigurationException("analyzer " + analyzerid
+ + " doesn't exist");
+ }
+ tmpanalyzer.addAnalyzer(fieldname, analyzer);
+ }
+ this.analyzer = tmpanalyzer;
+ }
+
+}
Added: lenya/trunk/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableStopwordAnalyzer.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableStopwordAnalyzer.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableStopwordAnalyzer.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/analyzer/ConfigurableStopwordAnalyzer.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,71 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.analyzer;
+
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+
+/**
+ * Configurable Stopword Analyzer
+ *
+ * Config file:
+ *
+ * <stopWords><stopWord>a </stopWord> <stopWord>the </stopWord> <stopWord>but
+ * </stopWord> </stopWords>
+ *
+ * @author Nicolas Maisonneuve
+ */
+public class ConfigurableStopwordAnalyzer extends ConfigurableAnalyzer {
+
+ /** The element containing a stop word. */
+ private static final String STOP_WORD_ELEMENT = "stopword";
+
+ /**
+ * Configures the analyzer.(stop words)
+ */
+ public void configure(Configuration configuration)
+ throws ConfigurationException {
+ String[] words = stopTableBuilder(configuration);
+ logger.info("stop words number: " + words.length);
+ analyzer = new StandardAnalyzer(words);
+ }
+
+ /**
+ * Build Stop Table
+ *
+ * @param conf
+ * Configuration file (above the STOP_WORDS ELEMENT)
+ * @throws ConfigurationException
+ * @return String[] array with all excluded words
+ */
+ static public String[] stopTableBuilder(Configuration conf)
+ throws ConfigurationException {
+
+ Configuration[] cStops = conf.getChildren(STOP_WORD_ELEMENT);
+ if (cStops != null) {
+ final String[] words = new String[cStops.length];
+ for (int i = 0; i < cStops.length; i++) {
+ words[i] = cStops[i].getValue();
+ }
+ return words;
+ }
+
+ final String[] words = new String[0];
+ return words;
+ }
+
+}
Added: lenya/trunk/src/java/org/apache/cocoon/components/search/components/AnalyzerManager.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/components/AnalyzerManager.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/components/AnalyzerManager.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/components/AnalyzerManager.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,75 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cocoon.components.search.components;
+
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.lucene.analysis.Analyzer;
+
+/**
+ * Analyzer Manager Component
+ *
+ * @author Maisonneuve Nicolas
+ */
+
+public interface AnalyzerManager {
+
+ public static final String ROLE = AnalyzerManager.class.getName();
+
+ /**
+ * Return the analyzer
+ *
+ * @param id
+ * analyzer ID
+ * @return
+ * @see org.apache.lucene.analysis.Analyzer
+ */
+ public Analyzer getAnalyzer(String id) throws ConfigurationException;
+
+ /**
+ * Is this analyzer exist
+ *
+ * @param id
+ * String the analyzer id
+ * @return boolean
+ */
+ public boolean exist(String id);
+
+ /**
+ * Return all analyzer IDs
+ *
+ * @return A array with all id's analyzer
+ */
+ public String[] getAnalyzersID();
+
+ /**
+ * Add a lucene analyser
+ *
+ * @param id
+ * the id of the analyzer
+ * @param analyzer
+ * the analyzer to add
+ */
+ public void put(String id, Analyzer analyzer);
+
+ /**
+ * Remove a analyzer
+ *
+ * @param id
+ * the analyzer ID
+ */
+ public void remove(String id);
+}
Added: lenya/trunk/src/java/org/apache/cocoon/components/search/components/IndexManager.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/components/IndexManager.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/components/IndexManager.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/components/IndexManager.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,69 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.components;
+
+import org.apache.cocoon.components.search.Index;
+import org.apache.cocoon.components.search.IndexException;
+
+/**
+ * Index Manager Class allow to register and access to a specific index
+ *
+ * @author Maisonneuve Nicolas
+ */
+public interface IndexManager {
+
+ public static final String ROLE = IndexManager.class.getName();
+
+ /**
+ * Return all indexes
+ *
+ * @return Array of indexes
+ */
+ public Index[] getIndex() throws IndexException;
+
+ /**
+ * Return the index with the id
+ *
+ * @param id
+ * the index ID
+ * @return l'index, null if no found
+ */
+ public Index getIndex(String id) throws IndexException;
+
+ /**
+ * add a index in the indexmanager
+ *
+ * @param index
+ */
+ public void addIndex(Index index);
+
+ /**
+ * remove a index
+ *
+ * @param id
+ * ID de l'index
+ */
+ public void remove(String id);
+
+ /**
+ * Check if the index exist
+ *
+ * @param id
+ * ID de l'index
+ * @return true if the index exist
+ */
+ public boolean contains(String id);
+}
Added: lenya/trunk/src/java/org/apache/cocoon/components/search/components/Indexer.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/components/Indexer.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/components/Indexer.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/components/Indexer.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,100 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.components;
+
+import org.apache.cocoon.components.search.IndexException;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.store.Directory;
+
+public interface Indexer {
+
+ public static final String ROLE = Indexer.class.getName();
+
+ /**
+ * All lucene documents must have a unique identifier field
+ */
+ public static final String DOCUMENT_UID_FIELD = "uid";
+
+ /**
+ * Index document (update or add if
+ *
+ * @link #clearIndex() is called before)
+ * @param doc
+ * Document
+ * @throws IndeException
+ */
+ public void index(Document doc) throws IndexException;
+
+ /**
+ * Delete document
+ *
+ * @param uid
+ * the uid of the document
+ * @return int the number of deleted documents
+ * @throws IndexException
+ */
+ public int del(String uid) throws IndexException;
+
+ /**
+ * Optimize the index
+ */
+ public void optimize() throws IndexException;
+
+ /**
+ * Set a lucene analyzer
+ *
+ * @param analyzer
+ * the analazer
+ */
+ public void setAnalyzer(Analyzer analyzer);
+
+ /**
+ * Get the lucene analyzer
+ */
+ public Analyzer getAnalyzer();
+
+ /**
+ * Set a merge factor value + set minMergeDocs=2*mergeFactor (see lucene
+ * docs)
+ *
+ * @param value
+ * the new merge factor
+ */
+ public void setMergeFactor(int value);
+
+ /**
+ *
+ * @return the mergeFactor
+ */
+ public int getMergeFactor();
+
+ /**
+ * clear the index
+ */
+ public void clearIndex() throws IndexException;
+
+ /**
+ * Set the index directory
+ *
+ * @param directory
+ * the index directory
+ * @throws Exception
+ */
+ public void setIndex(Directory directory) throws IndexException;
+
+ public Directory getIndex() throws IndexException;
+}
Added: lenya/trunk/src/java/org/apache/cocoon/components/search/components/Searcher.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/components/Searcher.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/components/Searcher.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/components/Searcher.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,59 @@
+package org.apache.cocoon.components.search.components;
+
+import org.apache.cocoon.ProcessingException;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.store.Directory;
+
+/**
+ * this Searcher Component allow:
+ * <br/> - search in several indexes
+ * <br/> - sort hits with a speficied
+ *
+ * @author Nicolas Maisonneuve
+ */
+public interface Searcher {
+/**
+ * The ROLE name of this avalon component.
+ * <p>
+ * Its value if the FQN of this interface,
+ * ie. <code>org.apache.cocoon.components.search.Searcher</code>.
+ * </p>
+ *
+ * @since
+ */
+String ROLE = Searcher.class.getName();
+
+/**
+ * add a lucene directory
+ * you can add several directories
+ * <p>
+ * The directory specifies the directory used for looking up the
+ * index. It defines the physical place of the index
+ * </p>
+ *
+ * @param directory The new directory value
+ */
+public void addDirectory(Directory directory);
+
+
+/**
+ * Set sort the hits with a field
+ * @param field the index field
+ * @param reverse reverse order or not
+ */
+public void setSortField(String field, boolean reverse);
+
+
+/**
+ * Search using a Lucene Query object, returning zero, or more hits.
+ * <p>
+ * </p>
+ *
+ * @param query A lucene query
+ * @return Hits zero or more hits matching the query string
+ * @exception ProcessingException throwing due to processing errors while
+ * looking up the index directory, parsing the query string, generating the hits.
+ */
+public Hits search(Query query) throws ProcessingException;
+}
Added: lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/AbstractIndexer.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/AbstractIndexer.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/AbstractIndexer.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/AbstractIndexer.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,393 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.components.impl;
+
+import java.io.IOException;
+
+import org.apache.avalon.excalibur.pool.Recyclable;
+import org.apache.avalon.framework.logger.AbstractLogEnabled;
+import org.apache.cocoon.components.search.IndexException;
+import org.apache.cocoon.components.search.components.Indexer;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.store.Directory;
+
+/**
+ * Abstract Indexer
+ *
+ * @author Nicolas Maisonneuve
+ */
+public abstract class AbstractIndexer extends AbstractLogEnabled implements
+ Indexer, Recyclable {
+
+ /**
+ * the lucene Analyzer (see lucene doc)
+ */
+ protected Analyzer analyzer;
+
+ /**
+ * lucene Directory (see lucene doc)
+ */
+ protected Directory dir;
+
+ /**
+ * MergeFactor (see lucene doc)
+ */
+ protected int mergeFactor;
+
+ /**
+ * clear mode (if true the index will be cleared)
+ */
+ protected boolean clear_mode;
+
+ // runtime variables: lucene indexwriter and indexreader
+ protected IndexReader delete_reader;
+
+ protected IndexWriter add_writer;
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.Indexer#setMergeFactor(int)
+ */
+ public void setMergeFactor(int value) {
+ mergeFactor = value;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.Indexer#getMergeFactor()
+ */
+ public int getMergeFactor() {
+ return mergeFactor;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.Indexer#getIndex()
+ */
+ public Directory getIndex() {
+ return this.dir;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.Indexer#setIndex(org.apache.lucene.store.Directory)
+ */
+ public void setIndex(Directory dir) throws IndexException {
+ if (dir == null) {
+ throw new IllegalArgumentException("set a null directory");
+ }
+ this.dir = dir;
+ clear_mode = false;
+ try {
+ IndexReader reader = IndexReader.open(dir);
+ reader.close();
+
+ } catch (IOException ioe) {
+ // couldn't open the index - so recreate it
+ if (getLogger().isWarnEnabled()) {
+ getLogger().warn("couldn't open the index - so recreate it");
+ }
+ this.clearIndex();
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.Indexer#setAnalyzer(org.apache.lucene.analysis.Analyzer)
+ */
+ public void setAnalyzer(Analyzer analyzer) {
+ if (analyzer == null) {
+ throw new IllegalArgumentException("set a null analyzer");
+ }
+ this.analyzer = analyzer;
+
+ if (this.getLogger().isDebugEnabled()) {
+ this.getLogger().debug(
+ "set the analyzer " + this.analyzer.getClass().getName());
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.Indexer#getAnalyzer()
+ */
+ public Analyzer getAnalyzer() {
+ return analyzer;
+ }
+
+ protected abstract void updateDocument(Document doc) throws IndexException;
+
+ protected abstract void addDocument(Document doc) throws IndexException;
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.Indexer#index(org.apache.lucene.document.Document)
+ */
+ public void index(Document doc) throws IndexException {
+ if (this.clear_mode) {
+ // As we know the index is empty , we just add the document
+ addDocument(doc);
+ } else {
+ updateDocument(doc);
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.Indexer#del(java.lang.String)
+ */
+ public int del(String uid) throws IndexException {
+ switchToDEL_MODE();
+ return deleteDocument(delete_reader, uid);
+ }
+
+ /**
+ * Delete document
+ *
+ * @param deleter
+ * the lucene indexreader to delete document
+ * @param uid
+ * the uid of the doucment to be deleted
+ * @return the number of deleted documents
+ * @throws IndexException
+ */
+ final protected int deleteDocument(IndexReader deleter, String uid)
+ throws IndexException {
+ int r = 0;
+ try {
+ r = deleter.delete(new Term(DOCUMENT_UID_FIELD, uid));
+ } catch (IOException ex) {
+ handleError("delete document (uid:" + uid + ") error", ex);
+ }
+ if (this.getLogger().isDebugEnabled()) {
+ this.getLogger().debug("document deleted (uid:" + uid + ")");
+ }
+ return r;
+ }
+
+ /**
+ * add document to the index
+ *
+ * @param writer
+ * the lucene indexwriter
+ * @param document
+ * the document to be indexed
+ * @throws IndexException
+ */
+ final protected void addDocument(IndexWriter writer, Document document)
+ throws IndexException {
+ try {
+ writer.addDocument(document, analyzer);
+ } catch (IOException ex) {
+ handleError("add document (uid:"
+ + document.get(DOCUMENT_UID_FIELD) + ") error", ex);
+ }
+ if (this.getLogger().isDebugEnabled()) {
+ this.getLogger().debug(
+ "document added (uid:" + document.get(DOCUMENT_UID_FIELD)
+ + ")");
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.Indexer#optimize()
+ */
+ public void optimize() throws IndexException {
+ // optimize index
+ try {
+ this.switchToADD_MODE(false);
+ add_writer.optimize();
+ } catch (IOException ex) {
+ throw new IndexException("optimization error", ex);
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.Indexer#clearIndex()
+ */
+ public void clearIndex() throws IndexException {
+ this.clear_mode = true;
+ this.switchToADD_MODE(true);
+ }
+
+ /**
+ * releasing resources
+ *
+ * @throws IndexException
+ */
+ protected void release() throws IndexException {
+ this.closeWriter();
+ this.closeReader();
+ // set default value
+ dir = null;
+ analyzer = null;
+ mergeFactor = 10;
+ }
+
+ /**
+ * recylcle this object
+ */
+ public void recycle() {
+ try {
+ release();
+ } catch (IndexException ex) {
+ this.getLogger().error("recycle error", ex);
+ }
+ }
+
+ /**
+ * Switch to write mode (close read, open writer ) if it's not already done
+ *
+ * @param clear
+ * clear index
+ * @throws IndexException
+ */
+ final protected void switchToADD_MODE(boolean clear) throws IndexException {
+ if (add_writer == null) {
+ closeReader();
+ openIndexWriter(clear);
+ }
+ }
+
+ /**
+ * Switch to del mode (close writer, open reader ) if it's not already done
+ *
+ * @throws IndexException
+ */
+ final protected void switchToDEL_MODE() throws IndexException {
+ if (delete_reader == null) {
+ closeWriter();
+ openIndexReader();
+ }
+ }
+
+ /**
+ * Open the index Writer
+ *
+ * @param create
+ * clear index or not
+ * @throws IndexException
+ */
+ final protected void openIndexWriter(boolean create) throws IndexException {
+
+ // now open writer
+ try {
+ add_writer = new IndexWriter(dir, analyzer, create);
+ // add_writer.setUseCompoundFile(true);
+ } catch (IOException e) {
+ throw new IndexException("open writer error", e);
+ }
+
+ if (mergeFactor > add_writer.mergeFactor) {
+ add_writer.minMergeDocs = mergeFactor * 2;
+ add_writer.mergeFactor = mergeFactor;
+ }
+
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug("writer is opened");
+ }
+ }
+
+ /**
+ * Open Index Reader
+ *
+ * @throws IndexException
+ */
+ final protected void openIndexReader() throws IndexException {
+ try {
+ this.delete_reader = IndexReader.open(dir);
+ } catch (IOException e) {
+ throw new IndexException("open reader error", e);
+ }
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug("reader is opened");
+ }
+
+ }
+
+ /**
+ * Close writer
+ *
+ * @throws IndexException
+ */
+ final protected void closeWriter() throws IndexException {
+ if (add_writer != null) {
+ try {
+ add_writer.close();
+ } catch (IOException ex) {
+ throw new IndexException("close writer error", ex);
+ } finally {
+ add_writer = null;
+ }
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug("writer is closed");
+ }
+ }
+ }
+
+ /**
+ * Close reader
+ *
+ * @throws IndexException
+ */
+ final protected void closeReader() throws IndexException {
+ if (this.delete_reader != null) {
+ try {
+ delete_reader.close();
+ } catch (IOException ex) {
+ handleError("close reader error", ex);
+ } finally {
+ delete_reader = null;
+ }
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug("reader is closed");
+ }
+ }
+ }
+
+ /**
+ * Handle error (close writer, reader,etc.. )
+ *
+ * @param message
+ * @param exception
+ * @throws IndexException
+ */
+ private void handleError(String message, Exception exception)
+ throws IndexException {
+ try {
+ release();
+ } catch (IndexException e) {
+ }
+ throw new IndexException(message, exception);
+ }
+
+}
Added: lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/AbstractSearcher.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/AbstractSearcher.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/AbstractSearcher.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/AbstractSearcher.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,122 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.components.impl;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.avalon.excalibur.pool.Recyclable;
+import org.apache.avalon.framework.activity.Disposable;
+import org.apache.avalon.framework.logger.AbstractLogEnabled;
+import org.apache.cocoon.ProcessingException;
+import org.apache.cocoon.components.search.components.Searcher;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.store.Directory;
+
+/**
+ * @author Nicolas Maisonneuve
+ *
+ */
+abstract class AbstractSearcher extends AbstractLogEnabled implements Searcher,
+ Disposable, Recyclable {
+ /**
+ * Lucene Directory
+ */
+ protected List directories = new ArrayList();
+
+ /**
+ * Lucene SortField
+ */
+ protected SortField sortfield;
+
+ /**
+ * Lucene Searcher
+ */
+ protected org.apache.lucene.search.Searcher luceneSearcher;
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.Searcher#addDirectory(org.apache.lucene.store.Directory)
+ */
+ public void addDirectory(Directory directory) {
+ directories.add(directory);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.Searcher#search(org.apache.lucene.search.Query)
+ */
+ public Hits search(Query query) throws ProcessingException {
+ try {
+ getLuceneSearcher();
+
+ if (sortfield==null) {
+ return luceneSearcher.search(query);
+ }
+ else {
+ return luceneSearcher.search(query, new Sort(sortfield));
+ }
+ } catch (IOException e) {
+ throw new ProcessingException(e);
+ }
+
+ }
+
+ protected abstract void getLuceneSearcher()
+ throws IOException;
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.Searcher#setSortField(java.lang.String,
+ * boolean)
+ */
+ public void setSortField(String field, boolean reverse) {
+ sortfield = new SortField(field, reverse);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.avalon.framework.activity.Disposable#dispose()
+ */
+ public void dispose() {
+ recycle();
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.avalon.excalibur.pool.Recyclable#recycle()
+ */
+ public void recycle() {
+ try {
+ directories.clear();
+ sortfield = null;
+ luceneSearcher.close();
+ } catch (IOException ex) {
+ this.getLogger().error("release error", ex);
+ }
+
+ }
+
+}
Added: lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/AnalyzerManagerImpl.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/AnalyzerManagerImpl.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/AnalyzerManagerImpl.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/AnalyzerManagerImpl.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,229 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.components.impl;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.avalon.framework.configuration.Configurable;
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.avalon.framework.logger.AbstractLogEnabled;
+import org.apache.avalon.framework.logger.LogEnabled;
+import org.apache.avalon.framework.service.ServiceException;
+import org.apache.avalon.framework.service.ServiceManager;
+import org.apache.avalon.framework.service.Serviceable;
+import org.apache.avalon.framework.thread.ThreadSafe;
+import org.apache.cocoon.components.search.analyzer.ConfigurableAnalyzer;
+import org.apache.cocoon.components.search.components.AnalyzerManager;
+import org.apache.excalibur.source.Source;
+import org.apache.excalibur.source.SourceResolver;
+import org.apache.lucene.analysis.Analyzer;
+
+/**
+ * Implementation of the Analyzer Component
+ *
+ * @author Maisonneuve Nicolas
+ * @version 1.0
+ */
+public class AnalyzerManagerImpl extends AbstractLogEnabled implements
+ AnalyzerManager, Serviceable, Configurable, ThreadSafe {
+
+ /**
+ * The analyzer element
+ */
+ public static final String ANALYZER_ELEMENT = "analyzer";
+
+ /**
+ * the id of the analyzer
+ */
+ public static final String ID_ATT = "id";
+
+ /**
+ * the analyzer class name
+ */
+ public static final String CLASSNAME_ATT = "class";
+
+ /**
+ * (optional) a file to configure the analyzer
+ */
+ public static final String CONFIG_ATT = "configfile";
+
+ /**
+ * Automatic update or not the analyzer when the config file changes
+ */
+ public static final String CONFIGCHECK_ATT = "checkupdate";
+
+ /**
+ * Map of all the analyzer (ID, analyzer class)
+ */
+ private Map analyzers = new HashMap();
+
+ private ServiceManager manager;
+
+ public boolean exist(String id) {
+ return this.analyzers.containsKey(id);
+ }
+
+ public void configure(Configuration configuration)
+ throws ConfigurationException {
+ Analyzer analyzer;
+ String key;
+ Source conffile = null;
+ boolean checkconfigfile = false;
+ SourceResolver resolver;
+
+ Configuration[] confAnalyzer = configuration
+ .getChildren(ANALYZER_ELEMENT);
+ if (confAnalyzer.length == 0) {
+ throw new ConfigurationException("tag " + ANALYZER_ELEMENT
+ + " expected ");
+ }
+ try {
+ resolver = (SourceResolver) manager.lookup(SourceResolver.ROLE);
+ } catch (ServiceException e) {
+ throw new ConfigurationException(" source resolver error", e);
+ }
+
+ for (int i = 0; i < confAnalyzer.length; i++) {
+
+ // KEY
+ key = confAnalyzer[i].getAttribute(ID_ATT);
+ if (key == null) {
+ throw new ConfigurationException("element " + ANALYZER_ELEMENT
+ + " must have a " + ID_ATT + " attribute");
+ }
+
+ // CLASS
+ String classname = confAnalyzer[i].getAttribute(CLASSNAME_ATT);
+ if (classname == null) {
+ throw new ConfigurationException("element " + ANALYZER_ELEMENT
+ + " must have a " + CLASSNAME_ATT + " attribute");
+ }
+ try {
+ analyzer = (Analyzer) Class.forName(classname).newInstance();
+ } catch (ClassNotFoundException ex) {
+ throw new ConfigurationException("analyzer class not found "
+ + classname, ex);
+ } catch (Exception ex) {
+ throw new ConfigurationException("instanciation of " + key
+ + " error", ex);
+ }
+
+ if (analyzer instanceof LogEnabled) {
+ this.setupLogger(analyzer);
+ }
+
+ if (analyzer instanceof ConfigurableAnalyzer) {
+ ConfigurableAnalyzer confanalyzer = ((ConfigurableAnalyzer) analyzer);
+
+ // CONFIGFILE
+ String conffilename = confAnalyzer[i].getAttribute(CONFIG_ATT);
+
+ if (conffilename == null || conffilename.equals("")) {
+ throw new ConfigurationException("the analyzer " + key
+ + " must have a " + CONFIG_ATT + " attribute");
+ }
+
+ try {
+ conffile = resolver.resolveURI(conffilename);
+ } catch (Exception ex1) {
+ throw new ConfigurationException(
+ "Config file source error", ex1);
+ }
+
+ // CHECKUPDATE
+ checkconfigfile = confAnalyzer[i].getAttributeAsBoolean(
+ CONFIGCHECK_ATT, false);
+
+ confanalyzer.setAnalyerManager(this);
+ confanalyzer.setConfigFile(conffile);
+ confanalyzer.setEnableCheckFile(checkconfigfile);
+ }
+ this.put(key, analyzer);
+ }
+
+ manager.release(resolver);
+ getLogger().info("AnalyzerManager configured.");
+ System.out.println("Search Engine - AnalyzerManager configured.");
+
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.AnalyzerManager#put(java.lang.String,
+ * org.apache.lucene.analysis.Analyzer)
+ */
+ public void put(String id, Analyzer analyzer) {
+ this.analyzers.put(id, analyzer);
+ this.getLogger().info(
+ "add analyzer id: " + id + " with class "
+ + analyzer.getClass().getName());
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.AnalyzerManager#remove(java.lang.String)
+ */
+ public void remove(String id) {
+ this.analyzers.remove(id);
+ if (this.getLogger().isDebugEnabled()) {
+ this.getLogger().debug("remove analyzer id: " + id);
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.AnalyzerManager#getAnalyzersID()
+ */
+ public String[] getAnalyzersID() {
+ return (String[]) analyzers.keySet().toArray(
+ new String[analyzers.size()]);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.AnalyzerManager#getAnalyzer(java.lang.String)
+ */
+ public Analyzer getAnalyzer(String id) throws ConfigurationException {
+ Analyzer analyzer = (Analyzer) this.analyzers.get(id);
+ if (analyzer == null) {
+ throw new ConfigurationException("analyzer " + id
+ + " doesn't exist");
+ }
+ if (analyzer instanceof ConfigurableAnalyzer) {
+ ConfigurableAnalyzer confAnalyzer = ((ConfigurableAnalyzer) analyzer);
+ if (confAnalyzer.enableCheckFile()) {
+ confAnalyzer.reconfigure();
+ }
+ }
+ return analyzer;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.avalon.framework.service.Serviceable#service(org.apache.avalon.framework.service.ServiceManager)
+ */
+ public void service(ServiceManager manager) throws ServiceException {
+ this.manager = manager;
+ }
+
+}
Added: lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/DefaultIndexerImpl.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/DefaultIndexerImpl.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/DefaultIndexerImpl.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/DefaultIndexerImpl.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,137 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.components.impl;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.avalon.framework.configuration.Configurable;
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.cocoon.components.search.IndexException;
+import org.apache.lucene.document.Document;
+
+/**
+ *
+ * @author Nicolas Maisonneuve
+ */
+public class DefaultIndexerImpl extends AbstractIndexer implements Configurable {
+
+ /**
+ * Buffer size is element
+ */
+ static public final String DOCUMENT_BUFFERED_NUM_ELEMENT = "buffer_size";
+
+ /**
+ * the default size of the buffer
+ */
+ private int defaultMaxBufDocs = 100;
+
+ /**
+ * Buffer Size: the number of the maximum documents buffered, before to
+ * flush and index this documents (the buffer is used in the update mode)
+ */
+ private int bufferSize;
+
+ /**
+ * the buffer: the List where are stored the documents
+ */
+ private List buffer = new ArrayList();
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.avalon.framework.configuration.Configurable#configure(org.apache.avalon.framework.configuration.Configuration)
+ */
+ public void configure(Configuration conf) throws ConfigurationException {
+ defaultMaxBufDocs = conf.getChild(DOCUMENT_BUFFERED_NUM_ELEMENT)
+ .getValueAsInteger(100);
+ if (this.getLogger().isDebugEnabled()) {
+ this.getLogger().debug(
+ "default max buffered documents: " + defaultMaxBufDocs);
+ }
+ }
+
+ /**
+ * Set the maximum number of buffered documents to avoid to open and close
+ * the IndexWriter a lot of times
+ *
+ * @param value
+ * int number (default 100)
+ */
+ public void setBufferSize(int value) {
+ bufferSize = value;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.impl.AbstractIndexer#release()
+ */
+ final protected void release() throws IndexException {
+ // flush the last documents to update
+ if (buffer.size() > 0) {
+ flushBufferedDocs();
+ }
+ bufferSize = defaultMaxBufDocs;
+ this.optimize();
+ super.release();
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.impl.AbstractIndexer#addDocument(org.apache.lucene.document.Document)
+ */
+ final protected void addDocument(Document doc) throws IndexException {
+ switchToADD_MODE(false);
+ addDocument(add_writer, doc);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.impl.AbstractIndexer#updateDocument(org.apache.lucene.document.Document)
+ */
+ final protected void updateDocument(Document doc) throws IndexException {
+ // first delete the old document
+ del(doc.get(DOCUMENT_UID_FIELD));
+
+ // then store in the index queue
+ buffer.add(doc);
+
+ // flush the queue if it's necessary
+ if (buffer.size() == bufferSize) {
+ flushBufferedDocs();
+ }
+ }
+
+ /**
+ * Index the list of documents to update
+ *
+ * @throws IOException
+ */
+ private void flushBufferedDocs() throws IndexException {
+ this.switchToADD_MODE(false);
+ Iterator iter = buffer.iterator();
+ while (iter.hasNext()) {
+ addDocument(add_writer, (Document) iter.next());
+ }
+ buffer.clear();
+ }
+}
Added: lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/DefaultSearcherImpl.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/DefaultSearcherImpl.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/DefaultSearcherImpl.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/DefaultSearcherImpl.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,46 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.components.impl;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MultiSearcher;
+import org.apache.lucene.store.Directory;
+
+/**
+ * use lucene MultiSearcher
+ * @author Nicolas Maisonneuve
+ */
+public class DefaultSearcherImpl extends AbstractSearcher {
+
+
+ protected void getLuceneSearcher() throws IOException {
+ if (directories.size() > 1) {
+ IndexSearcher[] searchers = new IndexSearcher[directories
+ .size()];
+ for (int i = 0; i < searchers.length; i++) {
+ searchers[i]= new IndexSearcher((Directory)(directories
+ .get(i)));
+ }
+ luceneSearcher = new MultiSearcher(searchers);
+ } else {
+ luceneSearcher = new IndexSearcher((Directory) (directories
+ .get(0)));
+ }
+ }
+
+}
Added: lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/IndexManagerImpl.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/IndexManagerImpl.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/IndexManagerImpl.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/IndexManagerImpl.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,355 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.components.impl;
+
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.avalon.framework.configuration.Configurable;
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.avalon.framework.logger.AbstractLogEnabled;
+import org.apache.avalon.framework.service.ServiceException;
+import org.apache.avalon.framework.service.ServiceManager;
+import org.apache.avalon.framework.service.Serviceable;
+import org.apache.avalon.framework.thread.ThreadSafe;
+import org.apache.cocoon.components.search.Index;
+import org.apache.cocoon.components.search.IndexException;
+import org.apache.cocoon.components.search.IndexStructure;
+import org.apache.cocoon.components.search.components.AnalyzerManager;
+import org.apache.cocoon.components.search.components.IndexManager;
+import org.apache.cocoon.components.search.fieldmodel.DateFieldDefinition;
+import org.apache.cocoon.components.search.fieldmodel.FieldDefinition;
+import org.apache.cocoon.components.search.utils.SourceHelper;
+import org.apache.excalibur.source.Source;
+
+//import org.apache.excalibur.source.SourceResolver;
+
+/**
+ * Index Manager Component. Configure and Manage the differents indexes.
+ *
+ * @author Maisonneuve Nicolas
+ * @version 1.0
+ */
+public class IndexManagerImpl extends AbstractLogEnabled implements
+ IndexManager, Serviceable, ThreadSafe, Configurable {
+
+
+ /**
+ * indexer element
+ */
+ public static final String INDEXER_ELEMENT = "indexer";
+
+ /**
+ * indexer element
+ */
+ public static final String INDEXER_ROLE_ATTRIBUTE = "role";
+
+ /**
+ * set of indexes
+ */
+ public static final String INDEXES_ELEMENT = "indexes";
+
+ /**
+ * Index declaration element
+ */
+ public static final String INDEX_ELEMENT = "index";
+
+ /**
+ * default analyzer of a index
+ */
+ public static final String INDEX_DEFAULTANALZER_ATTRIBUTE = "analyzer";
+
+ /**
+ * directory where the index is stored
+ */
+ public static final String INDEX_DIRECTORY_ATTRIBUTE = "directory";
+
+ /**
+ * Index Structure element
+ */
+ public static final String STRUCTURE_ELEMENT = "structure";
+
+ /**
+ * Field declaration element
+ */
+ public static final String FIELD_ELEMENT = "field";
+
+ /**
+ * field name
+ */
+ public static final String ID_ATTRIBUTE = "id";
+
+ /**
+ * type of the field: "text, "keyword", "date" (see
+ *
+ * @see org.apache.cocoon.components.search.fieldmodel.FieldDefinition
+ * class)
+ */
+ public static final String TYPE_ATTRIBUTE = "type";
+
+ /**
+ * store information or not (true/false)
+ */
+ public static final String STORE_ATTRIBUTE = "storetext";
+
+ /**
+ * The date Format when the field type is a date
+ */
+ public static final String DATEFORMAT_ATTRIBUTE = "dateformat";
+
+ /**
+ * check the config file each time the getIndex is called to update if
+ * necessary the configuration
+ */
+ // public static final String CHECK_ATTRIBUTE = "check";
+
+ /**
+ * Source of the index configuration file
+ */
+ // public static final String CONFIG_ATTRIBUTE = "config";
+
+ /**
+ * Check or not the configuration file (automatic update if the file is
+ * changed)
+ */
+ // private boolean check;
+
+ /**
+ * Index configuration file
+ */
+ // private Source configfile;
+
+ private ServiceManager manager;
+
+ private Map indexes;
+
+ public IndexManagerImpl() {
+ indexes = new HashMap();
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.IndexManager#contains(java.lang.String)
+ */
+ public boolean contains(String id) {
+ if (id != null) {
+ return this.indexes.get(id) != null;
+ }
+ return false;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.IndexManager#getIndex(java.lang.String)
+ */
+ public Index getIndex(String id) throws IndexException {
+
+ if (id == null || id.equals("")) {
+ throw new IndexException(" index with no name was called");
+ }
+
+ // if (check) {
+ // // if the configuration file has changed , reload it
+ // if (!SourceHelper.checkSourceValidity(configfile)) {
+ // try {
+ // configureIndexManager(configfile);
+ // } catch (ConfigurationException e) {
+ // throw new IndexException(
+ // "Configuration Exception (index called: " + id, e);
+ // }
+ // this.getLogger().info(
+ // "Index Configuration file has changed. Index Configuration is
+ // reloading...");
+ // }
+ // }
+
+ Index index = (Index) this.indexes.get(id);
+ if (index == null) {
+ throw new IndexException("index " + id + " doesn't exist");
+ }
+
+ return index;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.IndexManager#addIndex(org.apache.cocoon.components.search.Index)
+ */
+ public void addIndex(Index base) {
+ this.indexes.put(base.getID(), base);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.cocoon.components.search.components.IndexManager#remove(java.lang.String)
+ */
+ public void remove(String id) {
+ this.indexes.remove(id);
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.avalon.framework.configuration.Configurable#configure(org.apache.avalon.framework.configuration.Configuration)
+ */
+ public void configure(Configuration configuration)
+ throws ConfigurationException {
+
+ // // update the index if the config file changes
+ // check = configuration.getAttributeAsBoolean(CHECK_ATTRIBUTE, false);
+ //
+ // // index config file
+ // String sourceURI = configuration.getAttribute(CONFIG_ATTRIBUTE);
+ //
+ // try {
+ // SourceResolver resolver = (SourceResolver) manager
+ // .lookup(SourceResolver.ROLE);
+ // configfile = resolver.resolveURI(sourceURI);
+ // if (check) {
+ // SourceHelper.registerSource(configfile);
+ // }
+ // manager.release(resolver);
+ // } catch (Exception ex1) {
+ // throw new ConfigurationException("get source " + sourceURI
+ // + " error", ex1);
+ // }
+ // configureIndexManager(configfile);
+ ConfigureIndexManager(configuration);
+ System.out.println("Search Engine - Index Manager configured.");
+ }
+
+ private void configureIndexManager(Source source)
+ throws ConfigurationException {
+ Configuration configuration = SourceHelper.build(source);
+ ConfigureIndexManager(configuration);
+ }
+
+ private void ConfigureIndexManager(Configuration configuration)
+ throws ConfigurationException {
+ AnalyzerManager analyzerM = null;
+ String indexerRole = configuration.getChild(INDEXER_ELEMENT).getAttribute(INDEXER_ROLE_ATTRIBUTE);
+ Configuration[] confs = configuration.getChild(INDEXES_ELEMENT).getChildren(INDEX_ELEMENT);
+
+ if (confs.length == 0) {
+ throw new ConfigurationException("no index is defined !");
+ }
+ try {
+ analyzerM = (AnalyzerManager) this.manager
+ .lookup(AnalyzerManager.ROLE);
+ } catch (ServiceException ex1) {
+ throw new ConfigurationException("AnalyzerManager lookup error",ex1);
+ }
+
+ // configure each index
+ for (int i = 0; i < confs.length; i++) {
+ String id = confs[i].getAttribute(ID_ATTRIBUTE);
+ String analyzerid = confs[i]
+ .getAttribute(INDEX_DEFAULTANALZER_ATTRIBUTE);
+ String directory = confs[i].getAttribute(INDEX_DIRECTORY_ATTRIBUTE);
+ if (!analyzerM.exist(analyzerid)) {
+ throw new ConfigurationException("Analyzer " + analyzerid
+ + " no found");
+ }
+
+ Configuration[] fields = confs[i].getChild(STRUCTURE_ELEMENT)
+ .getChildren(FIELD_ELEMENT);
+
+ IndexStructure docdecl = new IndexStructure();
+ for (int j = 0; j < fields.length; j++) {
+
+ FieldDefinition fielddecl;
+
+ // field id attribute
+ String id_field = fields[j].getAttribute(ID_ATTRIBUTE);
+
+ // field type attribute
+ String typeS = fields[j].getAttribute(TYPE_ATTRIBUTE, "");
+ int type = FieldDefinition.stringTotype(typeS);
+ try {
+ fielddecl = FieldDefinition.create(id_field, type);
+ } catch (IllegalArgumentException e) {
+ throw new ConfigurationException("field " + id_field
+ + " type " + typeS, e);
+ }
+
+ // field store attribute
+ boolean store;
+ if (fielddecl.getType() == FieldDefinition.TEXT) {
+ store = fields[j].getAttributeAsBoolean(STORE_ATTRIBUTE,
+ false);
+ } else {
+ store = fields[j].getAttributeAsBoolean(STORE_ATTRIBUTE,
+ true);
+ }
+ fielddecl.setStore(store);
+
+ // field dateformat attribute
+ if (fielddecl.getType() == FieldDefinition.DATE) {
+ String dateformat_field = fields[j]
+ .getAttribute(DATEFORMAT_ATTRIBUTE);
+ ((DateFieldDefinition) fielddecl)
+ .setDateFormat(new SimpleDateFormat(
+ dateformat_field));
+ }
+
+ this.getLogger().debug("field added: " + fielddecl);
+ docdecl.addFieldDef(fielddecl);
+ }
+ try {
+ Index index = new Index();
+ index.setID(id);
+ index.setIndexer(indexerRole);
+
+ if (index.setDirectory(directory)) {
+ this.getLogger().warn(
+ "directory " + directory + " was locked ");
+ }
+ index.setDefaultAnalyzerID(analyzerid);
+ index.setStructure(docdecl);
+ index.setManager(manager);
+
+ this.addIndex(index);
+ this.getLogger().info("add index " + index.getID());
+ } catch (IOException ex) {
+ throw new ConfigurationException(ex.getMessage(), ex);
+ }
+ this.manager.release(analyzerM);
+ }
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.cocoon.components.search.components.IndexManager#getIndex()
+ */
+ public Index[] getIndex() {
+ return (Index[]) this.indexes.values().toArray(
+ new Index[indexes.size()]);
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.avalon.framework.service.Serviceable#service(org.apache.avalon.framework.service.ServiceManager)
+ */
+ public void service(ServiceManager manager) throws ServiceException {
+ this.manager = manager;
+ }
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@lenya.apache.org
For additional commands, e-mail: commits-help@lenya.apache.org