You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lenya.apache.org by mi...@apache.org on 2005/11/16 16:14:18 UTC

svn commit: r345031 [2/2] - in /lenya/trunk/src/java/org/apache/cocoon: ./ components/ components/search/ components/search/analyzer/ components/search/components/ components/search/components/impl/ components/search/fieldmodel/ components/search/utils...

Added: lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/ParallelIndexerImpl.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/ParallelIndexerImpl.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/ParallelIndexerImpl.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/ParallelIndexerImpl.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,246 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.components.impl;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Stack;
+
+import org.apache.avalon.framework.context.Context;
+import org.apache.avalon.framework.context.ContextException;
+import org.apache.avalon.framework.context.Contextualizable;
+import org.apache.cocoon.Constants;
+import org.apache.cocoon.components.search.IndexException;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+
+/**
+ * Parrallel Indexer Class
+ * 
+ * @author Nicolas Maisonneuve
+ */
+
+public class ParallelIndexerImpl extends AbstractIndexer implements
+        Contextualizable {
+
+    // Parallel specific variables
+    private Stack queue;
+
+    private boolean releaseSession, first_writing;
+
+    /**
+     * Number of threads (number of writers)
+     */
+    private int numThread;
+
+    /**
+     * temp dir where are stored the temporared index
+     */
+    private File tempDir;
+
+    /**
+     * multi-thread writer
+     */
+    private WriterThread[] writers;
+
+    public ParallelIndexerImpl() {
+        super();
+        this.queue = new Stack();
+
+        /**
+         * @TODO see how many processor there are automatically
+         */
+        this.setNumThread(2);
+        first_writing = true;
+    }
+
+    /**
+     * Set the number of thread writer
+     * 
+     * @param num
+     *            the number of thread
+     */
+    public void setNumThread(int num) {
+        numThread = num;
+        writers = new WriterThread[num];
+    }
+
+    /*
+     * (non-Javadoc)
+     * 
+     * @see org.apache.avalon.framework.context.Contextualizable#contextualize(org.apache.avalon.framework.context.Context)
+     */
+    public void contextualize(Context context) throws ContextException {
+        tempDir = (File) context.get(Constants.CONTEXT_WORK_DIR);
+    }
+
+    protected void release() throws IndexException {
+
+        // ok this is the end of indexation (information for the threads)
+        releaseSession = true;
+
+        // wait for the end of writer threads
+        boolean isindexing = true;
+        while (isindexing) {
+
+            // check if all the thread are died
+            isindexing = false;
+            for (int i = 0; i < writers.length; i++) {
+                isindexing |= writers[i].alive;
+            }
+
+            // no, so sleep
+            if (isindexing) {
+                try {
+                    Thread.sleep(50);
+                } catch (InterruptedException ex) {
+                    ex.printStackTrace();
+                }
+            } else {
+                break;
+            }
+        }
+
+        // merge index
+        if (getLogger().isDebugEnabled()) {
+            getLogger().debug("Merging....");
+        }
+        this.switchToADD_MODE(false);
+        Directory[] dirs = new Directory[numThread];
+        for (int i = 0; i < numThread; i++) {
+            dirs[i] = writers[i].dir;
+        }
+        try {
+            this.add_writer.addIndexes(dirs);
+        } catch (IOException ex1) {
+            throw new IndexException("merge error ", ex1);
+        }
+
+        releaseSession = false;
+        first_writing = true;
+        super.release();
+    }
+
+    final protected void addDocument(Document document) throws IndexException {
+        startThread();
+        // put the document in the queue
+        this.queue.add(document);
+    }
+
+    final protected void updateDocument(Document document)
+            throws IndexException {
+        del(document.get(DOCUMENT_UID_FIELD));
+        addDocument(document);
+    }
+
+    /**
+     * start the threads if it's not already done
+     * 
+     * @throws IndexException
+     */
+    private void startThread() throws IndexException {
+        if (first_writing) {
+            for (int i = 0; i < writers.length; i++) {
+                writers[i] = new WriterThread();
+                writers[i].start();
+            }
+            first_writing = false;
+        }
+    }
+
+    /**
+     * Writer Thread
+     */
+    final class WriterThread extends Thread {
+        boolean alive = true;
+
+        private IndexWriter mywriter;
+
+        Directory dir;
+
+        public void run() {
+            // create a temp directory to store a subindex
+            File file = new File(tempDir + File.separator + this.getName());
+            file.mkdirs();
+
+            // open a writer
+            try {
+                dir = FSDirectory.getDirectory(file, true);
+                mywriter = new IndexWriter(dir, analyzer, true);
+                mywriter.mergeFactor = mergeFactor;
+                mywriter.minMergeDocs = mergeFactor * 2;
+            } catch (IOException e) {
+                e.printStackTrace();
+                getLogger().error("Thread " + getName() + ": opening error", e);
+            }
+
+            if (getLogger().isDebugEnabled()) {
+                getLogger().debug(
+                        "WriterThread " + this.getName() + " is ready....");
+            }
+            while (alive) {
+                if (!queue.isEmpty()) {
+                    try {
+                        // add document
+                        Document doc = (Document) queue.pop();
+                        addDocument(mywriter, doc);
+                    } catch (IndexException ex) {
+                        ex.printStackTrace();
+                        getLogger().error(
+                                "Thread " + getName() + ": indexation error",
+                                ex);
+                    }
+                } else {
+                    // end session ?
+                    if (releaseSession) {
+
+                        // stop thread
+                        alive = false;
+
+                        // close writer
+                        try {
+                            mywriter.close();
+                        } catch (IOException ex) {
+                            ex.printStackTrace();
+                            getLogger()
+                                    .error(
+                                            "Thread " + getName()
+                                                    + ": close error", ex);
+                        }
+                    } else {
+                        // wait new documents
+                        try {
+                            Thread.sleep(20);
+                        } catch (InterruptedException e2) {
+                            getLogger()
+                                    .error(
+                                            "Thread " + getName()
+                                                    + ": sleep error", e2);
+                        }
+                    }
+                }
+
+            }
+            if (getLogger().isDebugEnabled()) {
+                getLogger().debug(
+                        "WriterThread " + getName() + " is stoping...");
+
+            }
+        }
+    }
+}

Added: lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/ParallelSearcherImpl.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/ParallelSearcherImpl.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/ParallelSearcherImpl.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/components/impl/ParallelSearcherImpl.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,48 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.components.impl;
+
+import java.io.IOException;
+
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.ParallelMultiSearcher;
+import org.apache.lucene.store.Directory;
+
+/**
+ * use lucene ParallelMultiSearcher Class 
+ * @author Nicolas Maisonneuve
+ */
+public class ParallelSearcherImpl extends AbstractSearcher {
+
+    /* (non-Javadoc)
+     * @see org.apache.cocoon.components.search.components.impl.AbstractSearcher#getLuceneSearcher()
+     */
+    protected void getLuceneSearcher() throws IOException {
+        if (directories.size() > 1) {
+                IndexSearcher[] searchers = new IndexSearcher[directories
+                        .size()];
+                for (int i = 0; i < searchers.length; i++) {
+                    searchers[i]= new IndexSearcher((Directory)(directories
+                        .get(i)));
+                }
+                luceneSearcher = new ParallelMultiSearcher(searchers);
+            } else {
+                luceneSearcher = new IndexSearcher((Directory) (directories
+                        .get(0)));
+            }
+    }
+
+}

Added: lenya/trunk/src/java/org/apache/cocoon/components/search/fieldmodel/DateFieldDefinition.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/fieldmodel/DateFieldDefinition.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/fieldmodel/DateFieldDefinition.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/fieldmodel/DateFieldDefinition.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,94 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.fieldmodel;
+
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+
+import org.apache.lucene.document.DateField;
+import org.apache.lucene.document.Field;
+
+/**
+ * Field Definition for Date type
+ * 
+ * @author Nicolas Maisonneuve
+ */
+public class DateFieldDefinition extends FieldDefinition {
+
+    private SimpleDateFormat df;
+
+    /**
+     * @param name
+     *            name of the field
+     */
+    public DateFieldDefinition(String name) {
+        super(name, DATE);
+    }
+
+    /**
+     * Set the date format to parse string date in the
+     * 
+     * @see #createLField(String) method
+     * @param df
+     */
+    public void setDateFormat(SimpleDateFormat df) {
+        this.df = df;
+    }
+
+    /**
+     * @return the dateformat
+     */
+    public SimpleDateFormat getDateFormat() {
+        return df;
+    }
+
+    /**
+     * Create a Lucene Field
+     * 
+     * @param dateString
+     *            String date in string format
+     * @throws ParseException
+     * @return
+     * @see org.apache.lucene.document.Field
+     * 
+     */
+    public final Field createLField(String dateString)
+            throws IllegalArgumentException {
+        Date date = null;
+        try {
+            date = df.parse(dateString);
+        } catch (ParseException ex) {
+            throw new IllegalArgumentException(ex.getMessage());
+        }
+        return createLField(date);
+    }
+
+    /**
+     * Create Lucene Field
+     * 
+     * @param date
+     *            the date
+     * @throws ParseException
+     * @return
+     * @see org.apache.lucene.document.Field
+     * 
+     */
+    public final Field createLField(Date date) {
+        return new Field(name, DateField.dateToString(date), store, true, index);
+    }
+
+}

Added: lenya/trunk/src/java/org/apache/cocoon/components/search/fieldmodel/FieldDefinition.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/fieldmodel/FieldDefinition.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/fieldmodel/FieldDefinition.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/fieldmodel/FieldDefinition.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,211 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.fieldmodel;
+
+import org.apache.lucene.document.Field;
+
+/**
+ * Field Definition class
+ * 
+ * @author Nicolas Maisonneuve
+ * 
+ */
+public abstract class FieldDefinition {
+    /**
+     * Text type
+     */
+    public static final int TEXT = 0;
+
+    /**
+     * Keyword type
+     */
+    public static final int KEYWORD = 1;
+
+    /**
+     * Date type
+     */
+    public static final int DATE = 2;
+
+    public static final String[] STRING_TYPE = { "text", "keyword", "date" };
+
+    /**
+     * Name of the field
+     */
+    protected String name;
+
+    /**
+     * type of the field (text, keyword, date)
+     */
+    protected int type;
+
+    /**
+     * Lucene Field specification
+     */
+    protected boolean store;
+
+    protected boolean index;
+
+    // futur lucene 1.9: protected Field.Store store;
+    // futur lucene 1.9: protected Field.Index index;
+
+    protected FieldDefinition(String name, String type)
+            throws IllegalArgumentException {
+        this(name, stringTotype(type));
+    }
+
+    protected FieldDefinition(String name, int type)
+            throws IllegalArgumentException {
+        this(name, type, false);
+    }
+
+    public static FieldDefinition create(String name, int type) {
+        FieldDefinition field = null;
+
+        if (name == null || name == "") {
+            throw new IllegalArgumentException("name cannot be empty");
+        }
+        switch (type) {
+        case TEXT:
+        case KEYWORD:
+            field = new StringFieldDefinition(name, type);
+            break;
+        case DATE:
+            field = new DateFieldDefinition(name);
+            break;
+        default:
+            throw new IllegalArgumentException("type not allowed");
+        }
+        return field;
+    }
+
+    /**
+     * 
+     * @param name
+     *            String field's name
+     * @param type
+     *            int indexation type
+     * @param store
+     *            boolean store value in the index
+     * @throws IllegalArgumentException
+     */
+    private FieldDefinition(String name, int type, boolean store)
+            throws IllegalArgumentException {
+
+        this.name = name.intern();
+        setType(type);
+        setStore(store);
+    }
+
+    public int hashCode() {
+        return name.hashCode() * this.type;
+    }
+
+    public void setStore(boolean store) {
+        // for futur lucene1.9
+        // this.store=(store)?Field.Store.YES:Field.Store.NO;
+        this.store = store;
+    }
+
+    public boolean getStore() {
+        // for futur lucene1.9 return this.store==Field.Store.YES;
+        return store;
+    }
+
+    public boolean equals(FieldDefinition fielddef) {
+        if (name == fielddef.name() && getType() == fielddef.getType()) {
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+    public boolean equals(Object object) {
+        if (object instanceof FieldDefinition) {
+            return equals((FieldDefinition) object);
+        } else {
+            return false;
+        }
+    }
+
+    public String name() {
+        return name;
+    }
+
+    /**
+     * Create Lucene Field
+     * 
+     * @param value
+     *            String value to store in the lucene field
+     * @return Field
+     */
+    public abstract Field createLField(String value);
+
+    public int getType() {
+        return type;
+    }
+
+    /**
+     * Set the type of the FieldDefinition (DATE,TEXT,KEYWORD)
+     * 
+     * @param type
+     *            int
+     * @throws IllegalArgumentException
+     */
+    private void setType(int type) throws IllegalArgumentException {
+        switch (type) {
+        case FieldDefinition.TEXT:
+            index = true;
+            break;
+        case FieldDefinition.DATE:
+            index = true;
+            break;
+        case FieldDefinition.KEYWORD:
+            index = false;
+            break;
+        default:
+            throw new IllegalArgumentException("type not allowed");
+        }
+        this.type = type;
+    }
+
+    public final String toString() {
+        StringBuffer b = new StringBuffer();
+        b.append("name: " + name);
+        b.append(", type: " + FieldDefinition.STRING_TYPE[type]);
+        b.append(", store: " + getStore());
+        return b.toString();
+    }
+
+    /**
+     * Convert String to type
+     * 
+     * @param typename
+     *            String
+     * @throws Exception
+     * @return int
+     */
+    static final public int stringTotype(String typename)
+            throws IllegalArgumentException {
+        for (int i = 0; i < STRING_TYPE.length; i++) {
+            if (typename.toLowerCase().equals(STRING_TYPE[i])) {
+                return i;
+            }
+        }
+        throw new IllegalArgumentException("type " + typename
+                + " is not allowed");
+    }
+
+}

Added: lenya/trunk/src/java/org/apache/cocoon/components/search/fieldmodel/StringFieldDefinition.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/fieldmodel/StringFieldDefinition.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/fieldmodel/StringFieldDefinition.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/fieldmodel/StringFieldDefinition.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,43 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.fieldmodel;
+
+import org.apache.lucene.document.Field;
+
+/**
+ * String Field Definition (used for Text and Keyword type)
+ * 
+ * @author Nicolas Maisonneuve
+ */
+public final class StringFieldDefinition extends FieldDefinition {
+
+    public StringFieldDefinition(String name, int type) {
+        super(name, type);
+    }
+
+    /**
+     * Create a Lucene Field
+     * 
+     * @param value
+     *            value to index
+     * @return
+     * @see org.apache.lucene.document.Field
+     */
+    public final Field createLField(String value) {
+        return new Field(name, value, store, true, index);
+    }
+
+}

Added: lenya/trunk/src/java/org/apache/cocoon/components/search/lucene2.roles
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/lucene2.roles?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/lucene2.roles (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/lucene2.roles Wed Nov 16 07:14:03 2005
@@ -0,0 +1,36 @@
+<?xml version="1.0"?>
+<!--
+  Copyright 1999-2004 The Apache Software Foundation
+
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!-- $Id$ -->
+<role-list>
+
+    <!-- default indexer -->
+  <role name="org.apache.cocoon.components.search.components.Indexer/default" shorthand="default_indexer"
+default-class="org.apache.cocoon.components.search.components.impl.DefaultIndexerImpl"/>
+
+    <!-- parallel indexer -->
+  <role name="org.apache.cocoon.components.search.components.Indexer/parallel" shorthand="parallel_indexer"
+default-class="org.apache.cocoon.components.search.components.impl.ParallelIndexerImpl"/>
+
+	<!-- analyzer manager -->
+  <role name="org.apache.cocoon.components.search.components.AnalyzerManager" shorthand="analyzer_manager"
+default-class="org.apache.cocoon.components.search.components.impl.AnalyzerManagerImpl"/>
+
+	<!-- index manager -->
+  <role name="org.apache.cocoon.components.search.components.IndexManager" shorthand="index_manager"
+default-class="org.apache.cocoon.components.search.components.impl.IndexManagerImpl"/>
+
+</role-list>

Added: lenya/trunk/src/java/org/apache/cocoon/components/search/utils/SourceHelper.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/components/search/utils/SourceHelper.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/components/search/utils/SourceHelper.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/components/search/utils/SourceHelper.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,89 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.components.search.utils;
+
+/**
+ * Utility class
+ * 
+ * @author Maisonneuve Nicolas
+ * 
+ */
+import java.io.IOException;
+import java.util.HashMap;
+
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.avalon.framework.configuration.DefaultConfigurationBuilder;
+import org.apache.excalibur.source.Source;
+import org.apache.excalibur.source.SourceValidity;
+import org.xml.sax.SAXException;
+
+public class SourceHelper {
+
+    static final private HashMap sources = new HashMap();
+
+    static final private DefaultConfigurationBuilder confBuilder = new DefaultConfigurationBuilder();
+
+    static final public void registerSource(Source source) {
+        if (!sources.containsKey(source)) {
+            SourceValidity refValidity = source.getValidity();
+            sources.put(source, refValidity);
+        }
+    }
+
+    /**
+     * Check the validity of the source with the registered source
+     * 
+     * @return true if the source didn't changed
+     */
+    static final public boolean checkSourceValidity(Source source) {
+        SourceValidity newValidity = source.getValidity();
+        SourceValidity refValidity = (SourceValidity) sources.get(source);
+        return checkSourceValidity(newValidity, refValidity);
+    }
+
+    /**
+     * Compare two sources
+     * 
+     * @return true if the source didn't changed
+     */
+    static final public boolean checkSourceValidity(SourceValidity s1Validity,
+            SourceValidity s2Validity) {
+
+        int valid = s2Validity.isValid();
+        boolean isValid;
+        if (valid == 0) {
+            valid = s2Validity.isValid(s1Validity);
+            isValid = (valid == 1);
+        } else {
+            isValid = (valid == 1);
+        }
+        return isValid;
+    }
+
+    static final public Configuration build(Source source)
+            throws ConfigurationException {
+        try {
+            return confBuilder.build(source.getInputStream());
+        } catch (IOException ex) {
+            throw new ConfigurationException("File " + source.getURI(), ex);
+        } catch (SAXException ex) {
+            throw new ConfigurationException(
+                    "SAX Error in the configuration File", ex);
+        }
+    }
+
+}

Added: lenya/trunk/src/java/org/apache/cocoon/transformation/LuceneIndexTransformer2.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/transformation/LuceneIndexTransformer2.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/transformation/LuceneIndexTransformer2.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/transformation/LuceneIndexTransformer2.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,607 @@
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.transformation;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.avalon.excalibur.pool.Recyclable;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.avalon.framework.parameters.Parameters;
+import org.apache.avalon.framework.service.ServiceException;
+import org.apache.avalon.framework.service.ServiceManager;
+import org.apache.avalon.framework.service.Serviceable;
+import org.apache.cocoon.ProcessingException;
+import org.apache.cocoon.components.search.Index;
+import org.apache.cocoon.components.search.IndexException;
+import org.apache.cocoon.components.search.components.AnalyzerManager;
+import org.apache.cocoon.components.search.components.IndexManager;
+import org.apache.cocoon.components.search.components.Indexer;
+import org.apache.cocoon.environment.SourceResolver;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+
+/**
+ * Another lucene index transformer.</br> allow
+ * <ul>
+ * <li>index function (update indexing or add indexing if clear attribute is
+ * true)</li>
+ * <li>lucene field boosting</li>
+ * <li>delete function</li>
+ * </ul>
+ * 
+ * <p>
+ * This tranformer used several avalon components, but you can use them
+ * separatly :
+ * <ul>
+ * <li>AnalyzerManager: you can setup a analyzer (configurable) in the
+ * analyzer_manager tag in cocoon.xconf file</li>
+ * <li>IndexManager: you can setup a index in a the /WEB-INF/index.xml (default
+ * location , but you can specify the location in the IndexManager component
+ * configuration in cocoon.xconf file)</li>
+ * <li>Indexer (2 implementations: default (with update optimization) and
+ * parallel implementation for multiple cpu)</li>
+ * </p>
+ * <p>
+ * <strong>Example of input source: </strong>
+ * </p>
+ * <p>
+ * <ul>
+ * <li>to Index <br>
+ * &lt;lucene:index xmlns:lucene=&quot;http://apache.org/cocoon/lucene/1.0&quot
+ * <br/>indexid=&quot;myindex&quot; <br>
+ * clear="true" (optinal attribute: clear index) <br/>merge-factor="100"&gt;
+ * (optinal attribute: see lucene doc) <br>
+ * <br/>&lt;lucene:document uid="http://myhost/myfile1.data"&gt; <br/>
+ * &lt;lucene:field name="tile" &gt; sqdqsdq &lt;/lucene:field&gt; <br>
+ * &lt;lucene:field name="description" &gt; a text bla bal blalael
+ * balbal&lt;/lucene:field&gt; <br>
+ * &lt;lucene:field name="date" &gt;10/12/2002&lt;/lucene:field&gt; <br/>
+ * &lt;/lucene:document&gt; <br>
+ * 
+ * <p>
+ * &lt;lucene:document uid="http://myhost/myfile2.data" &gt; <br>
+ * &lt;lucene:field name="author" boost="2" &gt;Mr Author &lt;/lucene:field&gt;
+ * <em>(boost the field for the search (see Lucene documentation))</em> <br/>
+ * &lt;lucene:field name="langage" &gt;french&lt;/lucene:field&gt; <br>
+ * &lt;/lucene:document&gt; <br>
+ * &lt; /lucene:index&gt;
+ * </p>
+ * </li>
+ * 
+ * <li>To delete <br/>
+ * <p>
+ * &lt;lucene:delete indexid="myindex" &gt; <br>
+ * &lt;lucene:document uid="http://myhost/myfile.data&quot; &gt; <br>
+ * &lt;lucene:document uid="EODOED-EFE" <br>
+ * &lt;/lucene:delete&gt;
+ * </p>
+ * 
+ * <p>
+ * <strong>Example of Output Source </strong>
+ * </p>
+ * <p>
+ * &lt;page xmlns:lucene=&quot;http://apache.org/cocoon/lucene/1.0&quot;&gt;
+ * <br>
+ * &lt; lucene:index &gt; <br>
+ * &lt;lucene:document uid="http://myhost/myfile1.data"/&gt; <br/>
+ * &lt;lucene:document uid="http://myhost/myfile2.data"/&gt; <br/>
+ * &lt;/lucene:index&gt;
+ * </p>
+ * <p>
+ * &lt;lucene:delete &gt; &lt;lucene:document
+ * uid="http://myhost/myfile1.data"/&gt; <br/>&lt;lucene:document
+ * uid="EODOED-EFE"/&gt; <br/>&lt;/lucene:delete &gt;</br></li>
+ * </ul>
+ * 
+ * @author Nicolas Maisonneuve
+ */
+
+public class LuceneIndexTransformer2 extends AbstractTransformer implements
+        Recyclable, Serviceable {
+
+    public static final String DIRECTORY_DEFAULT = "index";
+
+    public static final String LUCENE_URI = "http://apache.org/cocoon/lucene/1.0";
+
+    public static final String LUCENE_PREXIF = "lucene";
+
+    /**
+     * action element : index doc
+     */
+    public static final String LUCENE_INDEXING_ELEMENT = "index";
+
+    /**
+     * action element: delete doc
+     */
+    public static final String LUCENE_DELETING_ELEMENT = "delete";
+
+    /**
+     * index identity (see index definition file)
+     */
+    public static final String LUCENE_INDEXING_INDEXID_ATTRIBUTE = "indexid";
+
+    /**
+     * Optional attribute: Clear index: true/false (default: false)
+     */
+    public static final String LUCENE_INDEXING_CREATE_ATTRIBUTE = "clear";
+
+    /**
+     * Optional attribute: Analyzer identity: see analyzerManager Component
+     * (default: the analyer of the index declared in the index definition)
+     */
+    public static final String LUCENE_INDEXING_ANALYZER_ATTRIBUTE = "analyzer";
+
+    /**
+     * Optional attribute: MergeFactor number (default 10): improve the indexing
+     * speed for large indexing (see Lucene docs)
+     */
+    public static final String LUCENE_INDEXING_MERGE_FACTOR_ATTRIBUTE = "mergefactor";
+
+    /**
+     * Lucene document element
+     */
+    public static final String LUCENE_DOCUMENT_ELEMENT = "document";
+
+    /**
+     * Lucene document uid field
+     */
+    public static final String LUCENE_DOCUMENT_UID_ATTRIBUTE = "uid";
+
+    /**
+     * lucene field element
+     */
+    public static final String LUCENE_FIELD_ELEMENT = "field";
+
+    /**
+     * lucene field name
+     */
+    public static final String LUCENE_FIELD_NAME_ATTRIBUTE = "name";
+
+    /**
+     * Optional attribute: lucene field boost (see lucene docs)
+     */
+    public static final String LUCENE_FIELD_BOOST_ATTRIBUTE = "boost";
+
+    // The 6 states of the state machine
+    private int processing;
+
+    public static final int NO_PROCESSING = 0;
+
+    public static final int INDEX_PROCESS = 1;
+
+    public static final int IN_DOCUMENT_PROCESS = 2;
+
+    public static final int IN_FIELD_PROCESS = 4;
+
+    public static final int DELETE_PROCESS = 5;
+
+    public static final int DELETING_PROCESS = 6;
+
+    // Runtime variables
+    private int mergeFactor;
+
+    private AttributesImpl attrs = new AttributesImpl();
+
+    private Index index;
+
+    private Indexer indexer;
+
+    private ServiceManager manager;
+
+    private Document bodyDocument;
+
+    private String uid;
+
+    private String fieldname;
+
+    private float fieldboost;
+
+    private StringBuffer fieldvalue;
+
+    /**
+     * Setup the transformer.
+     */
+    public void setup(SourceResolver resolver, Map objectModel, String src,
+            Parameters parameters) throws ProcessingException, SAXException,
+            IOException {
+    }
+
+    public void recycle() {
+        this.processing = NO_PROCESSING;
+    }
+
+    public void service(ServiceManager manager) throws ServiceException {
+        this.manager = manager;
+    }
+
+    public void startDocument() throws SAXException {
+        super.startDocument();
+    }
+
+    public void endDocument() throws SAXException {
+        super.endDocument();
+    }
+
+    /**
+     * Begin the scope of a prefix-URI Namespace mapping.
+     * 
+     * @param prefix
+     *            The Namespace prefix being declared.
+     * @param uri
+     *            The Namespace URI the prefix is mapped to.
+     */
+    public void startPrefixMapping(String prefix, String uri)
+            throws SAXException {
+        if (processing == NO_PROCESSING) {
+            super.startPrefixMapping(prefix, uri);
+        }
+    }
+
+    /**
+     * End the scope of a prefix-URI mapping.
+     * 
+     * @param prefix
+     *            The prefix that was being mapping.
+     */
+    public void endPrefixMapping(String prefix) throws SAXException {
+        if (processing == NO_PROCESSING) {
+            super.endPrefixMapping(prefix);
+        }
+    }
+
+    public void startElement(String namespaceURI, String localName,
+            String qName, Attributes atts) throws SAXException {
+
+        // System.out.println("START processing: "+processing+" "+localName);
+
+        if (LUCENE_URI.equals(namespaceURI)) {
+            switch (processing) {
+
+            case NO_PROCESSING:
+
+                // index action
+                if (LUCENE_INDEXING_ELEMENT.equals(localName)) {
+                    this.initIndexer(atts);
+                    processing = INDEX_PROCESS;
+                    
+                    super.startElement(namespaceURI, localName, qName, attrs);
+                }
+                // delete action
+                else if (LUCENE_DELETING_ELEMENT.equals(localName)) {
+                    this.initIndexer(atts);
+                    processing = DELETE_PROCESS;
+                    super.startElement(namespaceURI, localName, qName, attrs);
+                } else {
+                    handleError("element " + localName + " unknown ");
+                }
+                break;
+
+            case INDEX_PROCESS:
+
+                // new document to index
+                if (LUCENE_DOCUMENT_ELEMENT.equals(localName)) {
+
+                    uid = atts.getValue(LUCENE_DOCUMENT_UID_ATTRIBUTE);
+                    if (uid == null) {
+                        handleError("<" + LUCENE_PREXIF + ":"
+                                + LUCENE_DOCUMENT_ELEMENT
+                                + "> element must contain "
+                                + LUCENE_DOCUMENT_UID_ATTRIBUTE + " attribute");
+                    }
+                    bodyDocument = index.createDocument(uid);
+                    processing = IN_DOCUMENT_PROCESS;
+                } else {
+                    handleError("element " + localName
+                            + " is not allowed in  <" + LUCENE_PREXIF + ":"
+                            + LUCENE_DOCUMENT_ELEMENT + "> element ");
+                }
+                break;
+
+            case DELETE_PROCESS:
+
+                if (LUCENE_DOCUMENT_ELEMENT.equals(localName)) {
+                    uid = atts.getValue(LUCENE_DOCUMENT_UID_ATTRIBUTE);
+                    if (uid == null) {
+                        handleError("<" + LUCENE_PREXIF + ":"
+                                + LUCENE_DOCUMENT_ELEMENT
+                                + "> element must contain "
+                                + LUCENE_DOCUMENT_UID_ATTRIBUTE + " attribute");
+                    }
+                    processing = DELETING_PROCESS;
+                } else {
+                    handleError("element " + localName
+                            + " is not a <lucene:document> element ");
+                }
+                break;
+
+            case IN_DOCUMENT_PROCESS:
+                if (LUCENE_FIELD_ELEMENT.equals(localName)) {
+
+                    // set the field name
+                    this.fieldname = atts.getValue(LUCENE_FIELD_NAME_ATTRIBUTE);
+                    if (this.fieldname == null || this.fieldname.equals("")) {
+                        handleError("<lucene:field> element must contain name attribut");
+                    }
+
+                    // clear the text buffer
+                    this.fieldvalue = new StringBuffer();
+
+                    // set boost value
+                    String fieldboostS = atts
+                            .getValue(LUCENE_FIELD_BOOST_ATTRIBUTE);
+                    if (fieldboostS == null) {
+                        fieldboost = 1.0f;
+                    } else {
+                        fieldboost = Float.parseFloat(fieldboostS);
+                    }
+                    processing = IN_FIELD_PROCESS;
+                } else {
+                    handleError("<" + LUCENE_PREXIF + ":"
+                            + LUCENE_FIELD_ELEMENT + " was expected!");
+                }
+                break;
+            }
+        } else {
+            // bypass
+            super.startElement(namespaceURI, localName, qName, atts);
+        }
+    }
+
+    public void endElement(String namespaceURI, String localName, String qName)
+            throws SAXException {
+
+        // System.out.println("END processing: "+processing+" "+localName);
+
+        if (LUCENE_URI.equals(namespaceURI)) {
+            switch (processing) {
+
+            case INDEX_PROCESS:
+                if (LUCENE_INDEXING_ELEMENT.equals(localName)) {
+                    // end of the indexing -> close the indexer
+                    this.closeIndexer();
+                    this.processing = NO_PROCESSING;
+                    super.endElement(namespaceURI, localName, qName);
+                } else {
+                    handleError("</lucene:" + LUCENE_DELETING_ELEMENT
+                            + " was expected!");
+                }
+                break;
+
+            case DELETE_PROCESS:
+                if (LUCENE_DELETING_ELEMENT.equals(localName)) {
+                    // end of the deleting -> close the indexer
+                    this.closeIndexer();
+                    this.processing = NO_PROCESSING;
+                    super.endElement(namespaceURI, localName, qName);
+                } else {
+                    handleError("</lucene:" + LUCENE_DELETING_ELEMENT
+                            + " was expected!");
+                }
+                break;
+
+            case IN_DOCUMENT_PROCESS:
+                if (LUCENE_DOCUMENT_ELEMENT.equals(localName)) {
+                    // index the document
+                    try {
+                        this.indexer.index(bodyDocument);
+                    } catch (IndexException ex1) {
+                        handleError(ex1);
+                    }
+                    if (this.getLogger().isDebugEnabled()) {
+                        this.getLogger().debug(
+                                " lucene document: " + this.bodyDocument);
+                    }
+                    bodyDocument = null;
+                    attrs.clear();
+                    attrs
+                            .addAttribute(namespaceURI, "uid", "uid", "CDATA",
+                                    uid);
+                    super.startElement(namespaceURI, localName, qName, attrs);
+                    super.endElement(namespaceURI, localName, qName);
+                    this.processing = INDEX_PROCESS;
+                } else {
+                    handleError("</lucene:" + LUCENE_DOCUMENT_ELEMENT
+                            + " was expected!");
+                }
+                break;
+
+            case DELETING_PROCESS:
+                if (LUCENE_DOCUMENT_ELEMENT.equals(localName)) {
+                    // delete a document
+                    try {
+                        indexer.del(uid);
+                    } catch (IndexException ex2) {
+                        handleError(ex2);
+                    }
+                    attrs.clear();
+                    attrs
+                            .addAttribute(namespaceURI, "uid", "uid", "CDATA",
+                                    uid);
+                    super.startElement(namespaceURI, localName, qName, attrs);
+                    super.endElement(namespaceURI, localName, qName);
+                    this.processing = DELETE_PROCESS;
+                } else {
+                    handleError("</lucene:" + LUCENE_DOCUMENT_ELEMENT
+                            + " was expected!");
+                }
+                break;
+
+            case IN_FIELD_PROCESS:
+                if (LUCENE_FIELD_ELEMENT.equals(localName)) {
+
+                    // create lucene field
+                    Field f = null;
+                    try {
+                        f = index.createField(fieldname, fieldvalue.toString());
+                    } catch (IndexException ex) {
+                        handleError(ex);
+                    }
+                    f.setBoost(fieldboost);
+
+                    // add field to the lucene document
+                    bodyDocument.add(f);
+                    processing = IN_DOCUMENT_PROCESS;
+                } else {
+                    handleError("</lucene:" + LUCENE_FIELD_ELEMENT
+                            + " was expected!");
+                }
+                break;
+
+            default:
+                handleError("unknow element " + LUCENE_FIELD_ELEMENT + " !");
+            }
+        } else {
+            super.endElement(namespaceURI, localName, qName);
+        }
+    }
+
+    public void characters(char[] ch, int start, int length)
+            throws SAXException {
+        if (processing == IN_FIELD_PROCESS) {
+            this.fieldvalue.append(ch, start, length);
+        } else {
+            super.characters(ch, start, length);
+        }
+
+    }
+
+    /**
+     * Configure the Indexer
+     * 
+     * @param id
+     *            the indexid
+     * @param analyzerid
+     * @param mergeF
+     * @param clear
+     * @throws SAXException
+     */
+    private void initIndexer(Attributes atts) throws SAXException {
+
+        String id = atts.getValue(LUCENE_INDEXING_INDEXID_ATTRIBUTE);
+        String analyzerid = atts.getValue(LUCENE_URI,
+                LUCENE_INDEXING_ANALYZER_ATTRIBUTE);
+        String mergeF = atts.getValue(LUCENE_URI,
+                LUCENE_INDEXING_MERGE_FACTOR_ATTRIBUTE);
+        String clear = atts.getValue(LUCENE_URI,
+                LUCENE_INDEXING_CREATE_ATTRIBUTE);
+        attrs = new AttributesImpl(atts);
+        
+        // set the indexer
+        try {
+            IndexManager indexM = (IndexManager) manager
+                    .lookup(IndexManager.ROLE);
+            index = indexM.getIndex(id);
+            if (index == null) {
+                handleError("index id: " + id
+                        + " no found in the index definition");
+            }
+            indexer = index.getIndexer();
+            manager.release(indexM);
+        } catch (ServiceException ex1) {
+            handleError("service Exception", ex1);
+        
+        } catch (IndexException ex3) {
+            handleError(" get Indexer error for index " + id, ex3);
+        }
+
+        // set a custum analyzer (default: the analyzer of the index)
+        if (analyzerid != null) {
+            Analyzer analyzer = null;
+            try {
+                AnalyzerManager analyzerM = (AnalyzerManager) manager
+                        .lookup(IndexManager.ROLE);
+                analyzer = analyzerM.getAnalyzer(analyzerid);
+                indexer.setAnalyzer(analyzer);
+                manager.release(analyzerM);
+            } catch (ServiceException ex1) {
+                handleError("service Exception", ex1);
+            } catch (ConfigurationException ex2) {
+                this.handleError("set analyzer error for index" + id, ex2);
+            }
+        }else {
+        
+            attrs.addAttribute(LUCENE_URI, LUCENE_INDEXING_ANALYZER_ATTRIBUTE,LUCENE_INDEXING_ANALYZER_ATTRIBUTE, "CDATA",
+                    index.getDefaultAnalyzerID());    
+        }
+
+        // set clear mode
+        boolean new_index = (clear != null && clear.toLowerCase()
+                .equals("true")) ? true : false;
+        if (new_index) {
+            try {
+                indexer.clearIndex();
+            } catch (IndexException ex3) {
+                handleError(" clear index error ", ex3);
+            }
+        }
+
+        // set the mergeFactor
+        if (mergeF != null) {
+            int mergeFactor = Integer.parseInt(mergeF);
+            indexer.setMergeFactor(mergeFactor);
+        }
+        
+        
+        if (this.getLogger().isDebugEnabled()) {
+            this.getLogger().debug(
+                    "index " + id + " clear: " + new_index + " analyzerid: "
+                            + analyzerid + "mergefactor: " + mergeF);
+        }
+    }
+
+    void handleError(String msg) throws SAXException {
+        this.handleError(msg, null);
+    }
+
+    void handleError(Exception ex) throws SAXException {
+        this.handleError("", ex);
+    }
+
+    /**
+     * Handle Exception or Error
+     * 
+     * @param msg
+     * @param ex
+     * @throws SAXException
+     */
+    void handleError(String msg, Exception ex) throws SAXException {
+        closeIndexer();
+        if (ex == null) {
+            // this.getLogger().error(msg);
+            throw new SAXException(msg);
+        } else {
+            // this.getLogger().error(msg, ex);
+            throw new SAXException(msg, ex);
+        }
+    }
+
+    /**
+     * Close the indexer
+     * 
+     * @throws SAXException
+     */
+    void closeIndexer() throws SAXException {
+        if (index != null) {
+            index.releaseIndexer(indexer);
+        }
+    }
+
+}

Added: lenya/trunk/src/java/org/apache/cocoon/transformation/LuceneIndexTransformerOptimized.java
URL: http://svn.apache.org/viewcvs/lenya/trunk/src/java/org/apache/cocoon/transformation/LuceneIndexTransformerOptimized.java?rev=345031&view=auto
==============================================================================
--- lenya/trunk/src/java/org/apache/cocoon/transformation/LuceneIndexTransformerOptimized.java (added)
+++ lenya/trunk/src/java/org/apache/cocoon/transformation/LuceneIndexTransformerOptimized.java Wed Nov 16 07:14:03 2005
@@ -0,0 +1,546 @@
+/*
+ * Copyright 1999-2004 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.cocoon.transformation;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.Map;
+import java.util.Stack;
+
+import org.apache.avalon.framework.configuration.Configurable;
+import org.apache.avalon.framework.configuration.Configuration;
+import org.apache.avalon.framework.configuration.ConfigurationException;
+import org.apache.avalon.framework.context.Context;
+import org.apache.avalon.framework.context.ContextException;
+import org.apache.avalon.framework.context.Contextualizable;
+import org.apache.avalon.framework.parameters.Parameters;
+import org.apache.avalon.framework.service.ServiceException;
+import org.apache.avalon.framework.service.ServiceManager;
+import org.apache.avalon.framework.service.Serviceable;
+import org.apache.cocoon.Constants;
+import org.apache.cocoon.ProcessingException;
+import org.apache.cocoon.caching.CacheableProcessingComponent;
+import org.apache.cocoon.components.search.IndexException;
+import org.apache.cocoon.components.search.LuceneCocoonHelper;
+import org.apache.cocoon.components.search.LuceneXMLIndexer;
+import org.apache.cocoon.components.search.components.Indexer;
+import org.apache.cocoon.environment.SourceResolver;
+import org.apache.commons.lang.BooleanUtils;
+import org.apache.excalibur.source.SourceValidity;
+import org.apache.excalibur.source.impl.validity.NOPValidity;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.store.Directory;
+import org.xml.sax.Attributes;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
+
+/**
+ * A lucene index creation transformer.
+ * <p>
+ * See <a
+ * href="http://wiki.cocoondev.org/Wiki.jsp?page=LuceneIndexTransformer">LuceneIndexTransformer
+ * </a> documentation on the Cocoon Wiki.
+ * </p>
+ * <p>
+ * TODO: Write more documentation.
+ * </p>
+ * 
+ * @author <a href="mailto:vgritsenko@apache.org">Vadim Gritsenko </a>
+ * @author <a href="mailto:conal@nzetc.org">Conal Tuohy </a>
+ * @author Nicolas Maisonneuve
+ */
+public class LuceneIndexTransformerOptimized extends AbstractTransformer implements
+        CacheableProcessingComponent, Configurable, Contextualizable,
+        Serviceable {
+
+    public static final String ANALYZER_CLASSNAME_CONFIG = "analyzer-classname";
+
+    public static final String ANALYZER_CLASSNAME_PARAMETER = "analyzer-classname";
+
+    public static final String ANALYZER_CLASSNAME_DEFAULT = "org.apache.lucene.analysis.standard.StandardAnalyzer";
+
+    public static final String DIRECTORY_CONFIG = "directory";
+
+    public static final String DIRECTORY_PARAMETER = "directory";
+
+    public static final String DIRECTORY_DEFAULT = "index";
+
+    public static final String MERGE_FACTOR_CONFIG = "merge-factor";
+
+    public static final String MERGE_FACTOR_PARAMETER = "merge-factor";
+
+    public static final int MERGE_FACTOR_DEFAULT = 20;
+
+    public static final String LUCENE_URI = "http://apache.org/cocoon/lucene/1.0";
+
+    public static final String LUCENE_QUERY_ELEMENT = "index";
+
+    public static final String LUCENE_QUERY_ANALYZER_ATTRIBUTE = "analyzer";
+
+    public static final String LUCENE_QUERY_DIRECTORY_ATTRIBUTE = "directory";
+
+    public static final String LUCENE_QUERY_CREATE_ATTRIBUTE = "create";
+
+    public static final String LUCENE_QUERY_MERGE_FACTOR_ATTRIBUTE = "merge-factor";
+
+    public static final String LUCENE_DOCUMENT_ELEMENT = "document";
+
+    public static final String LUCENE_DOCUMENT_URL_ATTRIBUTE = "url";
+
+    public static final String LUCENE_ELEMENT_ATTR_TO_TEXT_ATTRIBUTE = "text-attr";
+
+    public static final String LUCENE_ELEMENT_ATTR_STORE_VALUE = "store";
+
+    public static final String LUCENE_ELAPSED_TIME_ATTRIBUTE = "elapsed-time";
+
+    public static final String CDATA = "CDATA";
+
+    // The 3 states of the state machine
+    private static final int STATE_GROUND = 0; // initial or "ground" state
+
+    private static final int STATE_QUERY = 1; // processing a lucene:index
+                                                // (Query) element
+
+    private static final int STATE_DOCUMENT = 2; // processing a
+                                                    // lucene:document element
+
+    // Initialization time variables
+    protected File workDir = null;
+
+    // service manager
+    private ServiceManager manager;
+
+    private Indexer indexer;
+
+    // Declaration time parameters values (specified in sitemap component
+    // config)
+    private IndexerConfiguration configureConfiguration;
+
+    // Invocation time parameters values (specified in sitemap transform
+    // parameters)
+    private IndexerConfiguration setupConfiguration;
+
+    // Parameters specified in the input document
+    private IndexerConfiguration queryConfiguration;
+
+    // Runtime variables
+    private int processing;
+
+    private boolean createIndex = false;
+
+    private StringBuffer bodyText;
+
+    private Document bodyDocument;
+
+    private String bodyDocumentURL;
+
+    private Stack elementStack = new Stack();
+
+    /**
+     * Storage for the document element's attributes until the document has been
+     * indexed, so that they can be copied to the output along with a boolean
+     * <code>indexed</code> attribute.
+     */
+    private AttributesImpl documentAttributes;
+
+    private long documentStartTime;
+
+    private static String uid(String url) {
+        return url.replace('/', '\u0000'); // + "\u0000" +
+                                            // DateField.timeToString(urlConnection.getLastModified());
+    }
+
+    public void service(ServiceManager manager) throws ServiceException {
+        this.manager = manager;
+    }
+
+    /**
+     * Configure the transformer. The configuration parameters are stored as
+     * general defaults, which may be over-ridden by parameters specified as
+     * parameters in the sitemap pipeline, or by attributes of the query
+     * element(s) in the XML input document.
+     */
+    public void configure(Configuration conf) throws ConfigurationException {
+        this.configureConfiguration = new IndexerConfiguration(
+                conf.getChild(ANALYZER_CLASSNAME_CONFIG).getValue(
+                        ANALYZER_CLASSNAME_DEFAULT), conf.getChild(
+                        DIRECTORY_CONFIG).getValue(DIRECTORY_DEFAULT), conf
+                        .getChild(MERGE_FACTOR_CONFIG).getValueAsInteger(
+                                MERGE_FACTOR_DEFAULT));
+    }
+
+    /**
+     * Setup the transformer. Called when the pipeline is assembled. The
+     * parameters are those specified as child elements of the
+     * <code>&lt;map:transform&gt;</code> element in the sitemap. These
+     * parameters are optional: If no parameters are specified here then the
+     * defaults are supplied by the component configuration. Any parameters
+     * specified here may be over-ridden by attributes of the lucene:index
+     * element in the input document.
+     */
+    public void setup(SourceResolver resolver, Map objectModel, String src,
+            Parameters parameters) throws ProcessingException, SAXException,
+            IOException {
+        setupConfiguration = new IndexerConfiguration(parameters.getParameter(
+                ANALYZER_CLASSNAME_PARAMETER,
+                configureConfiguration.analyzerClassname), parameters
+                .getParameter(DIRECTORY_PARAMETER,
+                        configureConfiguration.indexDirectory), parameters
+                .getParameterAsInteger(MERGE_FACTOR_PARAMETER,
+                        configureConfiguration.mergeFactor));
+    }
+
+    /**
+     * Contextualize this class
+     */
+    public void contextualize(Context context) throws ContextException {
+        this.workDir = (File) context.get(Constants.CONTEXT_WORK_DIR);
+    }
+
+    public void recycle() {
+        this.processing = STATE_GROUND;
+        if (this.indexer != null) {
+            manager.release(indexer);
+            indexer = null;
+        }
+
+        this.bodyText = null;
+        this.bodyDocument = null;
+        this.bodyDocumentURL = null;
+        this.elementStack.clear();
+        super.recycle();
+    }
+
+    /**
+     * Generate the unique key. This key must be unique inside the space of this
+     * component.
+     * 
+     * @return The generated key
+     */
+    public Serializable getKey() {
+        return "1";
+    }
+
+    /**
+     * Generate the validity object.
+     * 
+     * @return The generated validity object or <code>null</code> if the
+     *         component is currently not cacheable.
+     */
+    public SourceValidity getValidity() {
+        return NOPValidity.SHARED_INSTANCE;
+    }
+
+    public void startDocument() throws SAXException {
+        super.startDocument();
+    }
+
+    public void endDocument() throws SAXException {
+        super.endDocument();
+    }
+
+    /**
+     * Begin the scope of a prefix-URI Namespace mapping.
+     * 
+     * @param prefix
+     *            The Namespace prefix being declared.
+     * @param uri
+     *            The Namespace URI the prefix is mapped to.
+     */
+    public void startPrefixMapping(String prefix, String uri)
+            throws SAXException {
+        if (processing == STATE_GROUND) {
+            super.startPrefixMapping(prefix, uri);
+        }
+    }
+
+    /**
+     * End the scope of a prefix-URI mapping.
+     * 
+     * @param prefix
+     *            The prefix that was being mapping.
+     */
+    public void endPrefixMapping(String prefix) throws SAXException {
+        if (processing == STATE_GROUND) {
+            super.endPrefixMapping(prefix);
+        }
+    }
+
+    public void startElement(String namespaceURI, String localName,
+            String qName, Attributes atts) throws SAXException {
+
+        if (processing == STATE_GROUND) {
+            if (LUCENE_URI.equals(namespaceURI)
+                    && LUCENE_QUERY_ELEMENT.equals(localName)) {
+                String sCreate = atts.getValue(LUCENE_QUERY_CREATE_ATTRIBUTE);
+                createIndex = BooleanUtils.toBoolean(sCreate);
+
+                String analyzerClassname = atts
+                        .getValue(LUCENE_QUERY_ANALYZER_ATTRIBUTE);
+                String indexDirectory = atts
+                        .getValue(LUCENE_QUERY_DIRECTORY_ATTRIBUTE);
+                String mergeFactor = atts
+                        .getValue(LUCENE_QUERY_MERGE_FACTOR_ATTRIBUTE);
+
+                queryConfiguration = new IndexerConfiguration(
+                        analyzerClassname != null ? analyzerClassname
+                                : setupConfiguration.analyzerClassname,
+                        indexDirectory != null ? indexDirectory
+                                : setupConfiguration.indexDirectory,
+                        mergeFactor != null ? Integer.parseInt(mergeFactor)
+                                : setupConfiguration.mergeFactor);
+
+                // propagate the lucene:index to the next stage in the pipeline
+                super.startElement(namespaceURI, localName, qName, atts);
+                processing = STATE_QUERY;
+            } else {
+                super.startElement(namespaceURI, localName, qName, atts);
+            }
+        } else if (processing == STATE_QUERY) {
+            // processing a lucene:index - expecting a lucene:document
+            if (LUCENE_URI.equals(namespaceURI)
+                    && LUCENE_DOCUMENT_ELEMENT.equals(localName)) {
+                this.bodyDocumentURL = atts
+                        .getValue(LUCENE_DOCUMENT_URL_ATTRIBUTE);
+                if (this.bodyDocumentURL == null) {
+                    throw new SAXException(
+                            "<lucene:document> must have @url attribute");
+                }
+
+                // Remember the time the document indexing began
+                this.documentStartTime = System.currentTimeMillis();
+                // remember these attributes so they can be passed on to the
+                // next stage in the pipeline,
+                // when this document element is ended.
+                this.documentAttributes = new AttributesImpl(atts);
+                this.bodyText = new StringBuffer();
+                this.bodyDocument = new Document();
+                this.elementStack.clear();
+                processing = STATE_DOCUMENT;
+            } else {
+                throw new SAXException(
+                        "<lucene:index> element can contain only <lucene:document> elements!");
+            }
+        } else if (processing == STATE_DOCUMENT) {
+            elementStack.push(new IndexHelperField(localName,
+                    new AttributesImpl(atts)));
+        }
+    }
+
+    public void endElement(String namespaceURI, String localName, String qName)
+            throws SAXException {
+
+        if (processing == STATE_QUERY) {
+            if (LUCENE_URI.equals(namespaceURI)
+                    && LUCENE_QUERY_ELEMENT.equals(localName)) {
+                // propagate the query element to the next stage in the pipeline
+                super.endElement(namespaceURI, localName, qName);
+                this.processing = STATE_GROUND;
+            } else {
+                throw new SAXException("</lucene:index> was expected!");
+            }
+        } else if (processing == STATE_DOCUMENT) {
+            if (LUCENE_URI.equals(namespaceURI)
+                    && LUCENE_DOCUMENT_ELEMENT.equals(localName)) {
+                // End document processing
+                this.bodyDocument.add(Field.UnStored(
+                        LuceneXMLIndexer.BODY_FIELD, this.bodyText.toString()));
+                this.bodyText = null;
+
+                this.bodyDocument.add(Field.UnIndexed(
+                        LuceneXMLIndexer.URL_FIELD, this.bodyDocumentURL));
+                // store: false, index: true, tokenize: false
+                this.bodyDocument.add(new Field(LuceneXMLIndexer.UID_FIELD,
+                        uid(this.bodyDocumentURL), false, true, false));
+                try {
+                    reindexDocument();
+                } catch (IndexException e) {
+                    throw new SAXException(e);
+                }
+                this.bodyDocumentURL = null;
+
+                // propagate the lucene:document element to the next stage in
+                // the pipeline
+                long elapsedTime = System.currentTimeMillis()
+                        - this.documentStartTime;
+                // documentAttributes = new AttributesImpl();
+                this.documentAttributes.addAttribute("",
+                        LUCENE_ELAPSED_TIME_ATTRIBUTE,
+                        LUCENE_ELAPSED_TIME_ATTRIBUTE, CDATA, String
+                                .valueOf(elapsedTime));
+                super.startElement(namespaceURI, localName, qName,
+                        this.documentAttributes);
+                super.endElement(namespaceURI, localName, qName);
+                this.processing = STATE_QUERY;
+            } else {
+                // End element processing
+                IndexHelperField tos = (IndexHelperField) elementStack.pop();
+                StringBuffer text = tos.getText();
+
+                Attributes atts = tos.getAttributes();
+                boolean attributesToText = atts.getIndex(LUCENE_URI,
+                        LUCENE_ELEMENT_ATTR_TO_TEXT_ATTRIBUTE) != -1;
+                for (int i = 0; i < atts.getLength(); i++) {
+                    // Ignore Lucene attributes
+                    if (LUCENE_URI.equals(atts.getURI(i))) {
+                        continue;
+                    }
+
+                    String atts_lname = atts.getLocalName(i);
+                    String atts_value = atts.getValue(i);
+                    bodyDocument.add(Field.UnStored(localName + "@"
+                            + atts_lname, atts_value));
+                    if (attributesToText) {
+                        text.append(atts_value);
+                        text.append(' ');
+                        bodyText.append(atts_value);
+                        bodyText.append(' ');
+                    }
+                }
+
+                boolean store = atts.getIndex(LUCENE_URI,
+                        LUCENE_ELEMENT_ATTR_STORE_VALUE) != -1;
+                if (text != null && text.length() > 0) {
+                    if (store) {
+                        bodyDocument
+                                .add(Field.Text(localName, text.toString()));
+                    } else {
+                        bodyDocument.add(Field.UnStored(localName, text
+                                .toString()));
+                    }
+                }
+            }
+        } else {
+            // All other tags
+            super.endElement(namespaceURI, localName, qName);
+        }
+    }
+
+    public void characters(char[] ch, int start, int length)
+            throws SAXException {
+
+        if (processing == STATE_DOCUMENT && ch.length > 0 && start >= 0
+                && length > 1 && elementStack.size() > 0) {
+            String text = new String(ch, start, length);
+            ((IndexHelperField) elementStack.peek()).append(text);
+            bodyText.append(text);
+            bodyText.append(' ');
+        } else if (processing == STATE_GROUND) {
+            super.characters(ch, start, length);
+        }
+    }
+
+    private void openWriter() throws IndexException {
+        System.out.println("use luceneIndexTransformer with indexer component");
+        // lookup the indexer
+        try {
+            indexer = (Indexer) this.manager.lookup(Indexer.ROLE+"/default");
+        } catch (ServiceException e) {
+            throw new IndexException(e);
+        }
+
+        File indexDirectory = new File(queryConfiguration.indexDirectory);
+        if (!indexDirectory.isAbsolute()) {
+            indexDirectory = new File(workDir,
+                    queryConfiguration.indexDirectory);
+        }
+        // If the index directory doesn't exist, then always create it.
+        boolean indexExists = IndexReader.indexExists(indexDirectory);
+        if (!indexExists) {
+            createIndex = true;
+        }
+        // Get the index directory, creating it if necessary
+        try {
+            Directory directory = LuceneCocoonHelper.getDirectory(
+                    indexDirectory, createIndex);
+            indexer.setIndex(directory);
+        } catch (IOException e) {
+            throw new IndexException("set directory " + indexDirectory
+                    + " error", e);
+        }
+        // Get the analyzer
+        Analyzer analyzer = LuceneCocoonHelper
+                .getAnalyzer(queryConfiguration.analyzerClassname);
+        indexer.setAnalyzer(analyzer);
+
+        this.indexer.setMergeFactor(queryConfiguration.mergeFactor);
+        if (this.createIndex) {
+            this.indexer.clearIndex();
+        }
+    }
+
+    private void reindexDocument() throws IndexException {
+        // The index is being created, so there's no need to delete the doc from
+        // an existing index.
+        // This means we can keep a single IndexWriter open throughout the
+        // process.
+        if (this.indexer == null) {
+            openWriter();
+        }
+        this.indexer.index(this.bodyDocument);
+        this.bodyDocument = null;
+    }
+
+    class IndexHelperField {
+        String localName;
+
+        StringBuffer text;
+
+        Attributes attributes;
+
+        IndexHelperField(String localName, Attributes atts) {
+            this.localName = localName;
+            this.attributes = atts;
+            this.text = new StringBuffer();
+        }
+
+        public Attributes getAttributes() {
+            return attributes;
+        }
+
+        public StringBuffer getText() {
+            return text;
+        }
+
+        public void append(String text) {
+            this.text.append(text);
+        }
+
+        public void append(char[] str, int offset, int length) {
+            this.text.append(str, offset, length);
+        }
+    }
+
+    class IndexerConfiguration {
+        String analyzerClassname;
+
+        String indexDirectory;
+
+        int mergeFactor;
+
+        public IndexerConfiguration(String analyzerClassname,
+                String indexDirectory, int mergeFactor) {
+            this.analyzerClassname = analyzerClassname;
+            this.indexDirectory = indexDirectory;
+            this.mergeFactor = mergeFactor;
+        }
+    }
+
+}



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@lenya.apache.org
For additional commands, e-mail: commits-help@lenya.apache.org