You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by ni...@apache.org on 2002/12/19 00:44:51 UTC

cvs commit: xml-cocoon2/src/scratchpad/src/org/apache/cocoon/transformation LuceneIndexTransformer.java

nicolaken    2002/12/18 15:44:51

  Added:       src/blocks/lucene/conf lucene.samplesxpipe lucene.xconf
                        lucene.xmap lucene.xroles
               src/blocks/lucene/java/org/apache/cocoon/components/search
                        IndexHelperField.java LuceneCocoonHelper.java
                        LuceneCocoonIndexer.java LuceneCocoonPager.java
                        LuceneCocoonSearcher.java
                        LuceneIndexContentHandler.java
                        LuceneXMLIndexer.java package.html
                        SimpleLuceneCocoonIndexerImpl.java
                        SimpleLuceneCocoonSearcherImpl.java
                        SimpleLuceneXMLIndexerImpl.java
               src/blocks/lucene/java/org/apache/cocoon/generation
                        SearchGenerator.java
               src/blocks/lucene/java/org/apache/cocoon/transformation
                        LuceneIndexTransformer.java
  Removed:     src/java/org/apache/cocoon/components/search
                        IndexHelperField.java lucene.samplesxpipe
                        lucene.xconf lucene.xroles LuceneCocoonHelper.java
                        LuceneCocoonIndexer.java LuceneCocoonPager.java
                        LuceneCocoonSearcher.java
                        LuceneIndexContentHandler.java
                        LuceneXMLIndexer.java package.html
                        SimpleLuceneCocoonIndexerImpl.java
                        SimpleLuceneCocoonSearcherImpl.java
                        SimpleLuceneXMLIndexerImpl.java
               src/java/org/apache/cocoon/generation lucene.xmap
                        SearchGenerator.java
               src/scratchpad/src/org/apache/cocoon/transformation
                        LuceneIndexTransformer.java
  Log:
  lucene block.
  
  Revision  Changes    Path
  1.1                  xml-cocoon2/src/blocks/lucene/conf/lucene.samplesxpipe
  
  Index: lucene.samplesxpipe
  ===================================================================
  <?xml version="1.0"?>
  
  <samplesxpipe xpath="/sitemap/pipelines/pipeline[@id='optional']"
         unless="match[@pattern='search/**']">
  
      <!-- Mount search pages sitemap, for using indexing & searching -->
      <map:match pattern="search/**">
        <map:mount check-reload="yes" src="search/" uri-prefix="search"/>
      </map:match>
  </samplesxpipe>
  
  
  
  1.1                  xml-cocoon2/src/blocks/lucene/conf/lucene.xconf
  
  Index: lucene.xconf
  ===================================================================
  <?xml version="1.0"?>
  
  <xconf xpath="/cocoon" unless="cocoon-indexer">
  
    <!-- Search:
      These are the components that handle the search. 
      
      Cocoon indexer write into an index.
      Cocoon searcher reads form an index, returning matched hits.
      Cocoon crawler crawls all links starting from a given base URI.
      Lucene xml indexer build a lucene document from XML content.
     -->
    <cocoon-indexer     logger="core.search.indexer"/>
    <cocoon-searcher    logger="core.search.searcher"/>  
    <cocoon-crawler     logger="core.search.crawler"/>
    <lucene-xml-indexer logger="core.search.lucene"/>
  </xconf>
  
  
  
  1.1                  xml-cocoon2/src/blocks/lucene/conf/lucene.xmap
  
  Index: lucene.xmap
  ===================================================================
  <?xml version="1.0"?>
  
  <xmap xpath="/sitemap/components/generators"
        unless="generator[@name='search']">
      <map:generator name="search" src="org.apache.cocoon.generation.SearchGenerator"/>
  </xmap>
  
  
  
  1.1                  xml-cocoon2/src/blocks/lucene/conf/lucene.xroles
  
  Index: lucene.xroles
  ===================================================================
  <?xml version="1.0"?>
  <xroles xpath="/role-list" unless="role[@name='org.apache.cocoon.components.search.LuceneCocoonIndexer']">
  
    <!-- Lucene Components -->
    <role name="org.apache.cocoon.components.search.LuceneCocoonIndexer" shorthand="cocoon-indexer"
  default-class="org.apache.cocoon.components.search.SimpleLuceneCocoonIndexerImpl"/>
  
    <role name="org.apache.cocoon.components.search.LuceneCocoonSearcher" shorthand="cocoon-searcher"
  default-class="org.apache.cocoon.components.search.SimpleLuceneCocoonSearcherImpl"/>
  
    <role name="org.apache.cocoon.components.search.LuceneXMLIndexer" shorthand="lucene-xml-indexer"
  default-class="org.apache.cocoon.components.search.SimpleLuceneXMLIndexerImpl"/>
  
    <role name="org.apache.cocoon.components.crawler.CocoonCrawler" shorthand="cocoon-crawler"
  default-class="org.apache.cocoon.components.crawler.SimpleCocoonCrawlerImpl"/>
  </xroles>
  
  
  
  1.1                  xml-cocoon2/src/blocks/lucene/java/org/apache/cocoon/components/search/IndexHelperField.java
  
  Index: IndexHelperField.java
  ===================================================================
  /*
  
   ============================================================================
                     The Apache Software License, Version 1.1
   ============================================================================
  
   Copyright (C) 1999-2002 The Apache Software Foundation. All rights reserved.
  
   Redistribution and use in source and binary forms, with or without modifica-
   tion, are permitted provided that the following conditions are met:
  
   1. Redistributions of  source code must  retain the above copyright  notice,
      this list of conditions and the following disclaimer.
  
   2. Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.
  
   3. The end-user documentation included with the redistribution, if any, must
      include  the following  acknowledgment:  "This product includes  software
      developed  by the  Apache Software Foundation  (http://www.apache.org/)."
      Alternately, this  acknowledgment may  appear in the software itself,  if
      and wherever such third-party acknowledgments normally appear.
  
   4. The names "Apache Cocoon" and  "Apache Software Foundation" must  not  be
      used to  endorse or promote  products derived from  this software without
      prior written permission. For written permission, please contact
      apache@apache.org.
  
   5. Products  derived from this software may not  be called "Apache", nor may
      "Apache" appear  in their name,  without prior written permission  of the
      Apache Software Foundation.
  
   THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
   FITNESS  FOR A PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT SHALL  THE
   APACHE SOFTWARE  FOUNDATION  OR ITS CONTRIBUTORS  BE LIABLE FOR  ANY DIRECT,
   INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLU-
   DING, BUT NOT LIMITED TO, PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS
   OF USE, DATA, OR  PROFITS; OR BUSINESS  INTERRUPTION)  HOWEVER CAUSED AND ON
   ANY  THEORY OF LIABILITY,  WHETHER  IN CONTRACT,  STRICT LIABILITY,  OR TORT
   (INCLUDING  NEGLIGENCE OR  OTHERWISE) ARISING IN  ANY WAY OUT OF THE  USE OF
   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  
   This software  consists of voluntary contributions made  by many individuals
   on  behalf of the Apache Software  Foundation and was  originally created by
   Stefano Mazzocchi  <st...@apache.org>. For more  information on the Apache
   Software Foundation, please see <http://www.apache.org/>.
  
  */
  package org.apache.cocoon.components.search;
  
  import org.xml.sax.Attributes;
  
  /**
   * A helper class for generating a lucene document in a SAX ContentHandler.
   *
   * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
   * @version    CVS $Id: IndexHelperField.java,v 1.1 2002/12/18 23:44:50 nicolaken Exp $
   */
  class IndexHelperField
  {
      String localFieldName;
      String qualifiedFieldName;
      StringBuffer text;
      Attributes attributes;
  
  
      /**
       *Constructor for the IndexHelperField object
       *
       * @param  lfn   Description of Parameter
       * @param  qfn   Description of Parameter
       * @param  atts  Description of Parameter
       * @since
       */
      IndexHelperField(String lfn, String qfn, Attributes atts) {
          this.localFieldName = lfn;
          this.qualifiedFieldName = qfn;
          this.attributes = atts;
          this.text = new StringBuffer();
      }
  
  
      /**
       *Gets the localFieldName attribute of the IndexHelperField object
       *
       * @return    The localFieldName value
       * @since
       */
      public String getLocalFieldName() {
          return localFieldName;
      }
  
  
      /**
       *Gets the qualifiedFieldName attribute of the IndexHelperField object
       *
       * @return    The qualifiedFieldName value
       * @since
       */
      public String getQualifiedFieldName() {
          return qualifiedFieldName;
      }
  
  
      /**
       *Gets the attributes attribute of the IndexHelperField object
       *
       * @return    The attributes value
       * @since
       */
      public Attributes getAttributes() {
          return attributes;
      }
  
  
      /**
       *Gets the text attribute of the IndexHelperField object
       *
       * @return    The text value
       * @since
       */
      public StringBuffer getText() {
          return text;
      }
  
  
      /**
       *Description of the Method
       *
       * @param  text  Description of Parameter
       * @since
       */
      public void appendText(String text) {
          this.text.append(text);
      }
  
  
      /**
       *Description of the Method
       *
       * @param  str     Description of Parameter
       * @param  offset  Description of Parameter
       * @param  length  Description of Parameter
       * @since
       */
      public void appendText(char[] str, int offset, int length) {
          this.text.append(str, offset, length);
      }
  }
  
  
  
  
  1.1                  xml-cocoon2/src/blocks/lucene/java/org/apache/cocoon/components/search/LuceneCocoonHelper.java
  
  Index: LuceneCocoonHelper.java
  ===================================================================
  /*
  
   ============================================================================
                     The Apache Software License, Version 1.1
   ============================================================================
  
   Copyright (C) 1999-2002 The Apache Software Foundation. All rights reserved.
  
   Redistribution and use in source and binary forms, with or without modifica-
   tion, are permitted provided that the following conditions are met:
  
   1. Redistributions of  source code must  retain the above copyright  notice,
      this list of conditions and the following disclaimer.
  
   2. Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.
  
   3. The end-user documentation included with the redistribution, if any, must
      include  the following  acknowledgment:  "This product includes  software
      developed  by the  Apache Software Foundation  (http://www.apache.org/)."
      Alternately, this  acknowledgment may  appear in the software itself,  if
      and wherever such third-party acknowledgments normally appear.
  
   4. The names "Apache Cocoon" and  "Apache Software Foundation" must  not  be
      used to  endorse or promote  products derived from  this software without
      prior written permission. For written permission, please contact
      apache@apache.org.
  
   5. Products  derived from this software may not  be called "Apache", nor may
      "Apache" appear  in their name,  without prior written permission  of the
      Apache Software Foundation.
  
   THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
   FITNESS  FOR A PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT SHALL  THE
   APACHE SOFTWARE  FOUNDATION  OR ITS CONTRIBUTORS  BE LIABLE FOR  ANY DIRECT,
   INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLU-
   DING, BUT NOT LIMITED TO, PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS
   OF USE, DATA, OR  PROFITS; OR BUSINESS  INTERRUPTION)  HOWEVER CAUSED AND ON
   ANY  THEORY OF LIABILITY,  WHETHER  IN CONTRACT,  STRICT LIABILITY,  OR TORT
   (INCLUDING  NEGLIGENCE OR  OTHERWISE) ARISING IN  ANY WAY OUT OF THE  USE OF
   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  
   This software  consists of voluntary contributions made  by many individuals
   on  behalf of the Apache Software  Foundation and was  originally created by
   Stefano Mazzocchi  <st...@apache.org>. For more  information on the Apache
   Software Foundation, please see <http://www.apache.org/>.
  
  */
  package org.apache.cocoon.components.search;
  
  import org.apache.lucene.analysis.Analyzer;
  import org.apache.lucene.index.IndexReader;
  import org.apache.lucene.index.IndexWriter;
  import org.apache.lucene.store.Directory;
  import org.apache.lucene.store.FSDirectory;
  
  import java.io.File;
  import java.io.IOException;
  
  /**
   * This class encapsulates some helper methods.
   *
   * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
   * @version    CVS $Id: LuceneCocoonHelper.java,v 1.1 2002/12/18 23:44:50 nicolaken Exp $
   */
  public class LuceneCocoonHelper
  {
      /**
       *Gets the directory attribute of the LuceneCocoonHelper class
       *
       * @param  directory        Description of Parameter
       * @param  create           Description of Parameter
       * @return                  The directory value
       * @exception  IOException  Description of Exception
       * @since
       */
      public static Directory getDirectory(File directory, boolean create) throws IOException {
          FSDirectory fsDirectory = FSDirectory.getDirectory(directory, create);
          return fsDirectory;
      }
  
      /**
       *Gets the analyzer attribute of the LuceneCocoonHelper class
       *
       * @param  analyzer_class_name  Description of Parameter
       * @return                      The analyzer value
       * @since
       */
      public static Analyzer getAnalyzer(String analyzer_class_name) {
          Analyzer analyzer = null;
          try {
              Class analyzer_class = Class.forName(analyzer_class_name);
              analyzer = (Analyzer) analyzer_class.newInstance();
          } catch (Exception e) {
          }
          return analyzer;
      }
  
      /**
       *Gets the indexReader attribute of the LuceneCocoonHelper class
       *
       * @param  directory        Description of Parameter
       * @return                  The indexReader value
       * @exception  IOException  Description of Exception
       * @since
       */
      public static IndexReader getIndexReader(Directory directory) throws IOException {
          IndexReader reader = IndexReader.open(directory);
          return reader;
      }
  
      /**
       *Gets the indexWriter attribute of the LuceneCocoonHelper class
       *
       * @param  index            Description of Parameter
       * @param  analyzer         Description of Parameter
       * @param  create           Description of Parameter
       * @return                  The indexWriter value
       * @exception  IOException  Description of Exception
       * @since
       */
      public static IndexWriter getIndexWriter(Directory index, Analyzer analyzer, boolean create) throws IOException {
          IndexWriter writer = new IndexWriter(index, analyzer, create);
          return writer;
      }
  }
  
  
  
  
  1.1                  xml-cocoon2/src/blocks/lucene/java/org/apache/cocoon/components/search/LuceneCocoonIndexer.java
  
  Index: LuceneCocoonIndexer.java
  ===================================================================
  /*
  
   ============================================================================
                     The Apache Software License, Version 1.1
   ============================================================================
  
   Copyright (C) 1999-2002 The Apache Software Foundation. All rights reserved.
  
   Redistribution and use in source and binary forms, with or without modifica-
   tion, are permitted provided that the following conditions are met:
  
   1. Redistributions of  source code must  retain the above copyright  notice,
      this list of conditions and the following disclaimer.
  
   2. Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.
  
   3. The end-user documentation included with the redistribution, if any, must
      include  the following  acknowledgment:  "This product includes  software
      developed  by the  Apache Software Foundation  (http://www.apache.org/)."
      Alternately, this  acknowledgment may  appear in the software itself,  if
      and wherever such third-party acknowledgments normally appear.
  
   4. The names "Apache Cocoon" and  "Apache Software Foundation" must  not  be
      used to  endorse or promote  products derived from  this software without
      prior written permission. For written permission, please contact
      apache@apache.org.
  
   5. Products  derived from this software may not  be called "Apache", nor may
      "Apache" appear  in their name,  without prior written permission  of the
      Apache Software Foundation.
  
   THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
   FITNESS  FOR A PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT SHALL  THE
   APACHE SOFTWARE  FOUNDATION  OR ITS CONTRIBUTORS  BE LIABLE FOR  ANY DIRECT,
   INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLU-
   DING, BUT NOT LIMITED TO, PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS
   OF USE, DATA, OR  PROFITS; OR BUSINESS  INTERRUPTION)  HOWEVER CAUSED AND ON
   ANY  THEORY OF LIABILITY,  WHETHER  IN CONTRACT,  STRICT LIABILITY,  OR TORT
   (INCLUDING  NEGLIGENCE OR  OTHERWISE) ARISING IN  ANY WAY OUT OF THE  USE OF
   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  
   This software  consists of voluntary contributions made  by many individuals
   on  behalf of the Apache Software  Foundation and was  originally created by
   Stefano Mazzocchi  <st...@apache.org>. For more  information on the Apache
   Software Foundation, please see <http://www.apache.org/>.
  
  */
  package org.apache.cocoon.components.search;
  
  import org.apache.avalon.framework.component.Component;
  import org.apache.cocoon.ProcessingException;
  import org.apache.lucene.analysis.Analyzer;
  import org.apache.lucene.store.Directory;
  
  import java.net.URL;
  
  /**
   * The avalon behavioural component interface of an indexer.
   *
   * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
   * @version    CVS $Id: LuceneCocoonIndexer.java,v 1.1 2002/12/18 23:44:50 nicolaken Exp $
   */
  public interface LuceneCocoonIndexer extends Component
  {
      /**
       *Description of the Field
       *
       * @since
       */
      String ROLE = "org.apache.cocoon.components.search.LuceneCocoonIndexer";
  
  
      /**
       *Sets the analyzer attribute of the LuceneCocoonIndexer object
       *
       * @param  analyzer  The new analyzer value
       * @since
       */
      void setAnalyzer(Analyzer analyzer);
  
  
      /**
       *Description of the Method
       *
       * @param  index                    Description of Parameter
       * @param  create                   Description of Parameter
       * @param  base_url                 Description of Parameter
       * @exception  ProcessingException  Description of Exception
       * @since
       */
      void index(Directory index, boolean create, URL base_url)
               throws ProcessingException;
  }
  
  
  
  1.1                  xml-cocoon2/src/blocks/lucene/java/org/apache/cocoon/components/search/LuceneCocoonPager.java
  
  Index: LuceneCocoonPager.java
  ===================================================================
  /*
  
   ============================================================================
                     The Apache Software License, Version 1.1
   ============================================================================
  
   Copyright (C) 1999-2002 The Apache Software Foundation. All rights reserved.
  
   Redistribution and use in source and binary forms, with or without modifica-
   tion, are permitted provided that the following conditions are met:
  
   1. Redistributions of  source code must  retain the above copyright  notice,
      this list of conditions and the following disclaimer.
  
   2. Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.
  
   3. The end-user documentation included with the redistribution, if any, must
      include  the following  acknowledgment:  "This product includes  software
      developed  by the  Apache Software Foundation  (http://www.apache.org/)."
      Alternately, this  acknowledgment may  appear in the software itself,  if
      and wherever such third-party acknowledgments normally appear.
  
   4. The names "Apache Cocoon" and  "Apache Software Foundation" must  not  be
      used to  endorse or promote  products derived from  this software without
      prior written permission. For written permission, please contact
      apache@apache.org.
  
   5. Products  derived from this software may not  be called "Apache", nor may
      "Apache" appear  in their name,  without prior written permission  of the
      Apache Software Foundation.
  
   THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
   FITNESS  FOR A PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT SHALL  THE
   APACHE SOFTWARE  FOUNDATION  OR ITS CONTRIBUTORS  BE LIABLE FOR  ANY DIRECT,
   INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLU-
   DING, BUT NOT LIMITED TO, PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS
   OF USE, DATA, OR  PROFITS; OR BUSINESS  INTERRUPTION)  HOWEVER CAUSED AND ON
   ANY  THEORY OF LIABILITY,  WHETHER  IN CONTRACT,  STRICT LIABILITY,  OR TORT
   (INCLUDING  NEGLIGENCE OR  OTHERWISE) ARISING IN  ANY WAY OUT OF THE  USE OF
   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  
   This software  consists of voluntary contributions made  by many individuals
   on  behalf of the Apache Software  Foundation and was  originally created by
   Stefano Mazzocchi  <st...@apache.org>. For more  information on the Apache
   Software Foundation, please see <http://www.apache.org/>.
  
  */
  package org.apache.cocoon.components.search;
  
  import org.apache.lucene.document.Document;
  import org.apache.lucene.search.Hits;
  
  import java.io.IOException;
  import java.util.ArrayList;
  import java.util.ListIterator;
  import java.util.NoSuchElementException;
  
  /**
   * This class should help you to manage paging of hits.
   *
   * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
   * @version    CVS $Id: LuceneCocoonPager.java,v 1.1 2002/12/18 23:44:50 nicolaken Exp $
   */
  public class LuceneCocoonPager implements ListIterator
  {
  
      /**
       *Description of the Field
       *
       * @since
       */
      public final static int COUNT_OF_HITS_PER_PAGE_DEFAULT = 5;
      /**
       *Description of the Field
       *
       * @since
       */
      public final static int HITS_INDEX_START_DEFAULT = 0;
  
      /**
       * current index of hit to return by next()
       *
       * @since
       */
      int hitsIndex = HITS_INDEX_START_DEFAULT;
  
      /**
       * maximum count of hits to return by next(), and previous()
       *
       * @since
       */
      int countOfHitsPerPage = COUNT_OF_HITS_PER_PAGE_DEFAULT;
  
      /**
       * hits to iterate upon
       *
       * @since
       */
      private Hits hits;
  
  
      /**
       * @param  hits  Description of Parameter
       * @since
       */
      public LuceneCocoonPager(Hits hits) {
          setHits(hits);
      }
  
  
      /**
       *Constructor for the LuceneCocoonPager object
       *
       * @since
       */
      public LuceneCocoonPager() { }
  
  
      /**
       *Sets the hits attribute of the LuceneCocoonPager object
       *
       * @param  hits  The new hits value
       * @since
       */
      public void setHits(Hits hits) {
          this.hits = hits;
          this.hitsIndex = HITS_INDEX_START_DEFAULT;
      }
  
  
      /**
       * Set count of hits displayed per single page
       *
       * @param  countOfHitsPerPage  The new countOfHitsPerPage value
       * @since
       */
      public void setCountOfHitsPerPage(int countOfHitsPerPage) {
          this.countOfHitsPerPage = countOfHitsPerPage;
      }
  
  
      /**
       * Get starting index for retrieving hits
       *
       * @param  start_index  The new startIndex value
       * @since
       */
      public void setStartIndex(int start_index) {
          this.hitsIndex = start_index;
      }
  
  
      /**
       * Replaces the last element returned by next or previous with the
       * specified element (optional operation).
       *
       * @param  o  Description of Parameter
       * @since
       */
      public void set(Object o) {
          throw new UnsupportedOperationException();
      }
  
  
      /**
       * Get count of hits
       *
       * @return    The count of hits
       * @since
       */
      public int getCountOfHits() {
          return hits.length();
      }
  
      /**
       * Get count of hits displayed per single page
       *
       * @return    The countOfHitsPerPage value
       * @since
       */
      public int getCountOfHitsPerPage() {
          return this.countOfHitsPerPage;
      }
  
      /**
       * Caluclate count of pages for displaying all hits
       *
       * @return    The countOfPages value
       * @since
       */
      public int getCountOfPages() {
          int count_of_pages = hits.length() / this.countOfHitsPerPage;
          int remainder = hits.length() % this.countOfHitsPerPage;
          if (remainder != 0) {
              count_of_pages += 1;
          }
          return count_of_pages;
      }
  
  
      /**
       * Set starting index for retrieving hits
       *
       * @return    The startIndex value
       * @since
       */
      public int getStartIndex() {
          return this.hitsIndex;
      }
  
  
      /**
       * Inserts the specified element into the list (optional operation).
       *
       * @param  o                                  Description of Parameter
       * @exception  UnsupportedOperationException  Description of Exception
       * @since
       */
      public void add(Object o) throws UnsupportedOperationException {
          throw new UnsupportedOperationException();
      }
  
  
      /**
       * Returns true if this list iterator has more elements when traversing
       * the list in the forward direction.
       *
       * @return    Description of the Returned Value
       * @since
       */
      public boolean hasNext() {
          boolean has_next = hitsIndex < hits.length();
          return has_next;
      }
  
  
      /**
       * Returns true if this list iterator has more elements when traversing
       * the list in the reverse direction.
       *
       * @return    Description of the Returned Value
       * @since
       */
      public boolean hasPrevious() {
          boolean has_previous = hitsIndex > countOfHitsPerPage;
          return has_previous;
      }
  
  
      /**
       * Returns the next element in the list.
       *
       * @return    Description of the Returned Value
       * @since
       */
      public Object next() {
          ArrayList hitsPerPageList = new ArrayList();
          int endIndex = Math.min(hits.length(), hitsIndex + countOfHitsPerPage);
          if (hitsIndex < endIndex) {
              while (hitsIndex < endIndex) {
                  try {
                      HitWrapper hit_wrapper = new HitWrapper(
                              hits.score(hitsIndex),
                              hits.doc(hitsIndex));
                      hitsPerPageList.add(hit_wrapper);
                  } catch (IOException ioe) {
                      throw new NoSuchElementException("no more hits: " + ioe.getMessage());
                  }
                  hitsIndex++;
              }
          } else {
              throw new NoSuchElementException();
          }
          return hitsPerPageList;
      }
  
  
      /**
       * Returns the index of the element that would be returned by a
       * subsequent call to next.
       *
       * @return    Description of the Returned Value
       * @since
       */
      public int nextIndex() {
          int next_index = Math.min(hitsIndex, hits.length());
          return next_index;
      }
  
  
      /**
       * Returns the previous element in the list.
       *
       * @return    Description of the Returned Value
       * @since
       */
      public Object previous() {
          ArrayList hitsPerPageList = new ArrayList();
  
          int startIndex = Math.max(0, hitsIndex - 2 * countOfHitsPerPage);
          int endIndex = Math.min(hits.length() - 1, hitsIndex - countOfHitsPerPage);
  
          if (startIndex < endIndex) {
              while (startIndex < endIndex) {
                  try {
                      HitWrapper hit_wrapper = new HitWrapper(
                              hits.score(startIndex),
                              hits.doc(startIndex));
                      hitsPerPageList.add(hit_wrapper);
                  } catch (IOException ioe) {
                      throw new NoSuchElementException("no more hits: " + ioe.getMessage());
                  }
                  startIndex++;
              }
              hitsIndex = endIndex;
          } else {
              throw new NoSuchElementException();
          }
          return hitsPerPageList;
      }
  
  
      /**
       * Returns the index of the element that would be returned by a
       * subsequent call to previous.
       *
       * @return    Description of the Returned Value
       * @since
       */
      public int previousIndex() {
          int previous_index = Math.max(0, hitsIndex - 2 * countOfHitsPerPage);
          return previous_index;
      }
  
  
      /**
       * Removes from the list the last element that was returned by next or
       * previous (optional operation).
       *
       * @since
       */
      public void remove() {
          throw new UnsupportedOperationException();
      }
  
  
      /**
       * A helper class encapsulating found document, and its score
       *
       * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
       * @version    CVS $Id: LuceneCocoonPager.java,v 1.1 2002/12/18 23:44:50 nicolaken Exp $
       */
      public static class HitWrapper
      {
          float score;
          Document document;
  
  
          /**
           * Constructor for the HitWrapper object
           *
           * @param  score     Description of Parameter
           * @param  document  Description of Parameter
           * @since
           */
          public HitWrapper(float score, Document document) {
              this.document = document;
              this.score = score;
          }
  
  
          /**
           * Gets the document attribute of the HitWrapper object
           *
           * @return    The document value
           * @since
           */
          public Document getDocument() {
              return document;
          }
  
  
          /**
           * Gets the score attribute of the HitWrapper object
           *
           * @return    The score value
           * @since
           */
          public float getScore() {
              return score;
          }
  
  
          /**
           * Gets the field attribute of the HitWrapper object
           *
           * @param  field  Description of Parameter
           * @return        The field value
           * @since
           */
          public String getField(String field) {
              return document.get(field);
          }
      }
  
  }
  
  
  
  
  1.1                  xml-cocoon2/src/blocks/lucene/java/org/apache/cocoon/components/search/LuceneCocoonSearcher.java
  
  Index: LuceneCocoonSearcher.java
  ===================================================================
  /*
  
   ============================================================================
                     The Apache Software License, Version 1.1
   ============================================================================
  
   Copyright (C) 1999-2002 The Apache Software Foundation. All rights reserved.
  
   Redistribution and use in source and binary forms, with or without modifica-
   tion, are permitted provided that the following conditions are met:
  
   1. Redistributions of  source code must  retain the above copyright  notice,
      this list of conditions and the following disclaimer.
  
   2. Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.
  
   3. The end-user documentation included with the redistribution, if any, must
      include  the following  acknowledgment:  "This product includes  software
      developed  by the  Apache Software Foundation  (http://www.apache.org/)."
      Alternately, this  acknowledgment may  appear in the software itself,  if
      and wherever such third-party acknowledgments normally appear.
  
   4. The names "Apache Cocoon" and  "Apache Software Foundation" must  not  be
      used to  endorse or promote  products derived from  this software without
      prior written permission. For written permission, please contact
      apache@apache.org.
  
   5. Products  derived from this software may not  be called "Apache", nor may
      "Apache" appear  in their name,  without prior written permission  of the
      Apache Software Foundation.
  
   THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
   FITNESS  FOR A PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT SHALL  THE
   APACHE SOFTWARE  FOUNDATION  OR ITS CONTRIBUTORS  BE LIABLE FOR  ANY DIRECT,
   INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLU-
   DING, BUT NOT LIMITED TO, PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS
   OF USE, DATA, OR  PROFITS; OR BUSINESS  INTERRUPTION)  HOWEVER CAUSED AND ON
   ANY  THEORY OF LIABILITY,  WHETHER  IN CONTRACT,  STRICT LIABILITY,  OR TORT
   (INCLUDING  NEGLIGENCE OR  OTHERWISE) ARISING IN  ANY WAY OUT OF THE  USE OF
   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  
   This software  consists of voluntary contributions made  by many individuals
   on  behalf of the Apache Software  Foundation and was  originally created by
   Stefano Mazzocchi  <st...@apache.org>. For more  information on the Apache
   Software Foundation, please see <http://www.apache.org/>.
  
  */
  package org.apache.cocoon.components.search;
  
  import org.apache.avalon.framework.component.Component;
  import org.apache.cocoon.ProcessingException;
  import org.apache.lucene.analysis.Analyzer;
  import org.apache.lucene.search.Hits;
  import org.apache.lucene.store.Directory;
  
  /**
   * The avalon behavioural component interface of a searcher.
   * <p>
   *   This component defines an interface for searching.
   *   The idea is to abstract the process of searching having a query string,
   *   and an index, and generating hits which matches the query string in the index.
   * </p>
   *
   * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
   * @version    CVS $Id: LuceneCocoonSearcher.java,v 1.1 2002/12/18 23:44:50 nicolaken Exp $
   */
  public interface LuceneCocoonSearcher extends Component
  {
      /**
       * The ROLE name of this avalon component.
       * <p>
       *   Its value if the FQN of this interface,
       *   ie. <code>org.apache.cocoon.components.search.LuceneCocoonSearcher</code>.
       * </p>
       *
       * @since
       */
      String ROLE = "org.apache.cocoon.components.search.LuceneCocoonSearcher";
  
  
      /**
       * Sets the analyzer attribute of the LuceneCocoonSearcher object
       * <p>
       *   The analyzer determines the tokenization of the query,
       *   and strategy of matching.
       * </p>
       * <p>
       *   The analyzer class defined here should be equivalent to the analyzer
       *   class used when creating the index used for searching.
       * </p>
       *
       * @param  analyzer  The new analyzer value
       * @since
       */
      void setAnalyzer(Analyzer analyzer);
  
  
      /**
       * Sets the directory attribute of the LuceneCocoonSearcher object
       * <p>
       *   The directory specifies the directory used for looking up the
       *   index. It defines the physical place of the index
       * </p>
       *
       * @param  directory  The new directory value
       * @since
       */
      void setDirectory(Directory directory);
  
  
      /**
       * Search a query-string, returning zero, or more hits.
       * <p>
       * </p>
       *
       * @param  query_string             A query string parsable by a query parser.
       * @param  default_field            The default field of the query string.
       * @return                          Hits zero or more hits matching the query string
       * @exception  ProcessingException  throwing due to processing errors while
       *   looking up the index directory, parsing the query string, generating the hits.
       * @since
       */
      Hits search(String query_string, String default_field) throws ProcessingException;
  }
  
  
  
  
  1.1                  xml-cocoon2/src/blocks/lucene/java/org/apache/cocoon/components/search/LuceneIndexContentHandler.java
  
  Index: LuceneIndexContentHandler.java
  ===================================================================
  /*
  
   ============================================================================
                     The Apache Software License, Version 1.1
   ============================================================================
  
   Copyright (C) 1999-2002 The Apache Software Foundation. All rights reserved.
  
   Redistribution and use in source and binary forms, with or without modifica-
   tion, are permitted provided that the following conditions are met:
  
   1. Redistributions of  source code must  retain the above copyright  notice,
      this list of conditions and the following disclaimer.
  
   2. Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.
  
   3. The end-user documentation included with the redistribution, if any, must
      include  the following  acknowledgment:  "This product includes  software
      developed  by the  Apache Software Foundation  (http://www.apache.org/)."
      Alternately, this  acknowledgment may  appear in the software itself,  if
      and wherever such third-party acknowledgments normally appear.
  
   4. The names "Apache Cocoon" and  "Apache Software Foundation" must  not  be
      used to  endorse or promote  products derived from  this software without
      prior written permission. For written permission, please contact
      apache@apache.org.
  
   5. Products  derived from this software may not  be called "Apache", nor may
      "Apache" appear  in their name,  without prior written permission  of the
      Apache Software Foundation.
  
   THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
   FITNESS  FOR A PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT SHALL  THE
   APACHE SOFTWARE  FOUNDATION  OR ITS CONTRIBUTORS  BE LIABLE FOR  ANY DIRECT,
   INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLU-
   DING, BUT NOT LIMITED TO, PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS
   OF USE, DATA, OR  PROFITS; OR BUSINESS  INTERRUPTION)  HOWEVER CAUSED AND ON
   ANY  THEORY OF LIABILITY,  WHETHER  IN CONTRACT,  STRICT LIABILITY,  OR TORT
   (INCLUDING  NEGLIGENCE OR  OTHERWISE) ARISING IN  ANY WAY OUT OF THE  USE OF
   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  
   This software  consists of voluntary contributions made  by many individuals
   on  behalf of the Apache Software  Foundation and was  originally created by
   Stefano Mazzocchi  <st...@apache.org>. For more  information on the Apache
   Software Foundation, please see <http://www.apache.org/>.
  
  */
  package org.apache.cocoon.components.search;
  
  import org.apache.lucene.document.Document;
  import org.apache.lucene.document.Field;
  import org.xml.sax.Attributes;
  import org.xml.sax.ContentHandler;
  import org.xml.sax.Locator;
  import org.xml.sax.helpers.AttributesImpl;
  
  import java.util.ArrayList;
  import java.util.HashSet;
  import java.util.Iterator;
  import java.util.List;
  import java.util.Stack;
  
  /**
   * Parse XML and generate lucene document(s)
   *
   *	can now be configured via SimpleLuceneXMLIndexerImpl
   *  to store specific tags in Lucene, so that you can
   *  display them with hits.
   *
   *
   * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
   * @author     <a href="mailto:jeremy@apache.org">Jeremy Quinn</a>
   * @version    CVS $Id: LuceneIndexContentHandler.java,v 1.1 2002/12/18 23:44:50 nicolaken Exp $
   */
  public class LuceneIndexContentHandler implements ContentHandler
  {
      public static final String LUCENE_URI = "http://apache.org/cocoon/lucene/1.0";
  
      /** If this attribute is specified on element, values of all attributes
       * if this element added to the text of the element, and to the document
       * body text */
      public static final String LUCENE_ATTR_TO_TEXT_ATTRIBUTE = "text-attr";
  
      StringBuffer bodyText;
      private List documents;
      private Document bodyDocument;
      private Stack elementStack;
      private HashSet fieldTags;
  
      /**
       *Constructor for the LuceneIndexContentHandler object
       *
       * @since
       */
      public LuceneIndexContentHandler() {
          this.bodyText = new StringBuffer();
          this.bodyDocument = new Document();
          this.documents = new ArrayList();
          this.documents.add(this.bodyDocument);
          this.elementStack = new Stack();
          this.fieldTags = new HashSet();
      }
  
      /**
       *Sets the fieldTags attribute of the LuceneIndexContentHandler object
       *
       * @param  fieldTags  The new fieldTags value
       * @since
       */
      public void setFieldTags(HashSet fieldTags) { 
      	this.fieldTags = fieldTags;
      }
  
      /**
       *Sets the documentLocator attribute of the LuceneIndexContentHandler object
       *
       * @param  locator  The new documentLocator value
       * @since
       */
      public void setDocumentLocator(Locator locator) { }
  
  
      /**
       *Description of the Method
       *
       * @return    Description of the Returned Value
       * @since
       */
      public List allDocuments() {
          return documents;
      }
  
  
      /**
       *Description of the Method
       *
       * @return    Description of the Returned Value
       * @since
       */
      public Iterator iterator() {
          return documents.iterator();
      }
  
  
      /**
       *Description of the Method
       *
       * @param  ch      Description of Parameter
       * @param  start   Description of Parameter
       * @param  length  Description of Parameter
       * @since
       */
      public void characters(char[] ch, int start, int length) {
  
          if (ch.length > 0 && start >= 0 && length > 1) {
              String text = new String(ch, start, length);
              if (elementStack.size() > 0) {
                  IndexHelperField tos = (IndexHelperField) elementStack.peek();
                  tos.appendText(text);
              }
              bodyText.append(text);
          }
      }
  
  
      /**
       *Description of the Method
       *
       * @since
       */
      public void endDocument() {
          /*
           *  empty
           */
          bodyDocument.add(Field.UnStored(LuceneXMLIndexer.BODY_FIELD, bodyText.toString()));
      }
  
  
      /**
       *Description of the Method
       *
       * @param  namespaceURI  Description of Parameter
       * @param  localName     Description of Parameter
       * @param  qName         Description of Parameter
       * @since
       */
      public void endElement(String namespaceURI, String localName, String qName) {
          IndexHelperField tos = (IndexHelperField) elementStack.pop();
          String lname = tos.getLocalFieldName();
          StringBuffer text = tos.getText();
  
          // (VG): Atts are never null, see startElement
          Attributes atts = tos.getAttributes();
          boolean attributesToText = atts.getIndex(LUCENE_URI, LUCENE_ATTR_TO_TEXT_ATTRIBUTE) != -1;
          for (int i = 0; i < atts.getLength(); i++) {
              if (LUCENE_URI.equals(atts.getURI(i))) continue;
  
              String atts_lname = atts.getLocalName(i);
              String atts_value = atts.getValue(i);
              bodyDocument.add(Field.UnStored(lname + "@" + atts_lname, atts_value));
              if (attributesToText) {
                  text.append(atts_value);
                  text.append(' ');
                  bodyText.append(atts_value);
                  bodyText.append(' ');
              }
          }
  
          if (text != null && text.length() > 0) {
          
          	if (isFieldTag(lname)) {
          		bodyDocument.add(Field.UnIndexed(lname, text.toString()));
          	}
          	bodyDocument.add(Field.UnStored(lname, text.toString()));
          }
      }
  
  
      /**
       *Description of the Method
       *
       * @param  prefix  Description of Parameter
       * @since
       */
      public void endPrefixMapping(String prefix) { }
  
  
      /**
       *Description of the Method
       *
       * @param  ch      Description of Parameter
       * @param  start   Description of Parameter
       * @param  length  Description of Parameter
       * @since
       */
      public void ignorableWhitespace(char[] ch, int start, int length) { }
  
  
      /**
       *Description of the Method
       *
       * @param  target  Description of Parameter
       * @param  data    Description of Parameter
       * @since
       */
      public void processingInstruction(String target, String data) { }
  
  
      /**
       *Description of the Method
       *
       * @param  name  Description of Parameter
       * @since
       */
      public void skippedEntity(String name) { }
  
  
      /**
       *Description of the Method
       *
       * @since
       */
      public void startDocument() { }
  
  
      /**
       *Description of the Method
       *
       * @param  namespaceURI  Description of Parameter
       * @param  localName     Description of Parameter
       * @param  qName         Description of Parameter
       * @param  atts          Description of Parameter
       * @since
       */
      public void startElement(String namespaceURI, String localName, String qName, Attributes atts) {
          IndexHelperField ihf = new IndexHelperField(localName, qName, new AttributesImpl(atts));
          elementStack.push(ihf);
      }
  
  
      /**
       *Description of the Method
       *
       * @param  prefix  Description of Parameter
       * @param  uri     Description of Parameter
       * @since
       */
      public void startPrefixMapping(String prefix, String uri) { }
  
      /**
       * check if tag is a candidate for making into a Field
       *
       * @param  tag  local name of the tag we are processing
       * @return      boolean
       * @since
       */
      private boolean isFieldTag(String tag) {
          // by default do not make field
          if (fieldTags == null) {
              return false;
          }
  				final String s = tag.toString();
          Iterator i = fieldTags.iterator();
          while (i.hasNext()) {
          		if (tag.equals(i.next())) {
                  return true;
              }
          }
          return false;
      }
  }
  
  
  
  1.1                  xml-cocoon2/src/blocks/lucene/java/org/apache/cocoon/components/search/LuceneXMLIndexer.java
  
  Index: LuceneXMLIndexer.java
  ===================================================================
  /*
  
   ============================================================================
                     The Apache Software License, Version 1.1
   ============================================================================
  
   Copyright (C) 1999-2002 The Apache Software Foundation. All rights reserved.
  
   Redistribution and use in source and binary forms, with or without modifica-
   tion, are permitted provided that the following conditions are met:
  
   1. Redistributions of  source code must  retain the above copyright  notice,
      this list of conditions and the following disclaimer.
  
   2. Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.
  
   3. The end-user documentation included with the redistribution, if any, must
      include  the following  acknowledgment:  "This product includes  software
      developed  by the  Apache Software Foundation  (http://www.apache.org/)."
      Alternately, this  acknowledgment may  appear in the software itself,  if
      and wherever such third-party acknowledgments normally appear.
  
   4. The names "Apache Cocoon" and  "Apache Software Foundation" must  not  be
      used to  endorse or promote  products derived from  this software without
      prior written permission. For written permission, please contact
      apache@apache.org.
  
   5. Products  derived from this software may not  be called "Apache", nor may
      "Apache" appear  in their name,  without prior written permission  of the
      Apache Software Foundation.
  
   THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
   FITNESS  FOR A PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT SHALL  THE
   APACHE SOFTWARE  FOUNDATION  OR ITS CONTRIBUTORS  BE LIABLE FOR  ANY DIRECT,
   INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLU-
   DING, BUT NOT LIMITED TO, PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS
   OF USE, DATA, OR  PROFITS; OR BUSINESS  INTERRUPTION)  HOWEVER CAUSED AND ON
   ANY  THEORY OF LIABILITY,  WHETHER  IN CONTRACT,  STRICT LIABILITY,  OR TORT
   (INCLUDING  NEGLIGENCE OR  OTHERWISE) ARISING IN  ANY WAY OUT OF THE  USE OF
   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  
   This software  consists of voluntary contributions made  by many individuals
   on  behalf of the Apache Software  Foundation and was  originally created by
   Stefano Mazzocchi  <st...@apache.org>. For more  information on the Apache
   Software Foundation, please see <http://www.apache.org/>.
  
  */
  package org.apache.cocoon.components.search;
  
  import org.apache.avalon.framework.component.Component;
  import org.apache.cocoon.ProcessingException;
  
  import java.net.URL;
  import java.util.List;
  
  /**
   * The avalon behavioural component interface of generating
   * lucene documents from an xml content.
   *
   * <p>
   *  The well-known fields of a lucene documents are defined as
   *  <code>*_FIELD</code> constants.
   * </p>
   * <p>
   *  You may access generated lucene documents via
   *  <code>allDocuments()</code>, or <code>iterator()</code>.
   * </p>
   * <p>
   *  You trigger the generating of lucene documents via
   *  <code>build()</code>.
   * </p>
   *
   * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
   * @version    CVS $Id: LuceneXMLIndexer.java,v 1.1 2002/12/18 23:44:50 nicolaken Exp $
   */
  public interface LuceneXMLIndexer extends Component
  {
  
      /**
       * The ROLE name of this avalon component.
       * <p>
       *   Its value if the FQN of this interface,
       *   ie. <code>org.apache.cocoon.components.search.LuceneXMLIndexer</code>.
       * </p>
       *
       * @since
       */
      String ROLE = "org.apache.cocoon.components.search.LuceneXMLIndexer";
  
      /**
       * A Lucene document field name, containing xml content text of all xml elements.
       * <p>
       *   A concrete implementation of this interface SHOULD
       *   provides a field named body.
       * </p>
       * <p>
       *   A concrete implementation MAY provide additional lucene
       *   document fields.
       * </p>
       *
       * @since
       */
      String BODY_FIELD = "body";
  
      /**
       * A Lucene document field name, containg the URI/URL of the indexed
       * document.
       * <p>
       *   A concrete implementation of this interface SHOULD
       *   provide a field named url.
       * </p>
       *
       * @since
       */
      String URL_FIELD = "url";
  
      /**
       * A Lucene document field name, containg the a unique key of the indexed
       * document.
       * <p>
       *  This document field is used internally to track document
       *  changes, and updates.
       * </p>
       * <p>
       *   A concrete implementation of this interface SHOULD
       *   provide a field named uid.
       * </p>
       *
       * @since
       */
      String UID_FIELD = "uid";
  
      /**
       * Build lucene documents from a URL.
       * <p>
       *   This method will read the content of the URL, and generates
       *   one or more lucene documents. The generated lucence documents
       *   can be fetched using methods allDocuments(), and iterator().
       * </p>
       *
       * @param  url                      the content of this url gets indexed.
       * @exception  ProcessingException  Description of Exception
       * @since
       */
      List build(URL url) throws ProcessingException;
  }
  
  
  
  1.1                  xml-cocoon2/src/blocks/lucene/java/org/apache/cocoon/components/search/package.html
  
  Index: package.html
  ===================================================================
  <html>
  <head>
    <title>Search</title>
  </head>
  <body>
    <h1>Search - indexing and searching of xml content</h1>
    <p>
      The search package provides interfaces and methods for 
      indexing, and for searching.
    </p>
    <p>
      The indexing part builds a searchindex. The search part
      searches through the searchindex.
    </p>
    <p>
      See the document userdocs/concepts/xmlsearching.html
      - XML Searching
    </p>
  </body>
  </html>
  
  
  
  1.1                  xml-cocoon2/src/blocks/lucene/java/org/apache/cocoon/components/search/SimpleLuceneCocoonIndexerImpl.java
  
  Index: SimpleLuceneCocoonIndexerImpl.java
  ===================================================================
  /*
  
   ============================================================================
                     The Apache Software License, Version 1.1
   ============================================================================
  
   Copyright (C) 1999-2002 The Apache Software Foundation. All rights reserved.
  
   Redistribution and use in source and binary forms, with or without modifica-
   tion, are permitted provided that the following conditions are met:
  
   1. Redistributions of  source code must  retain the above copyright  notice,
      this list of conditions and the following disclaimer.
  
   2. Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.
  
   3. The end-user documentation included with the redistribution, if any, must
      include  the following  acknowledgment:  "This product includes  software
      developed  by the  Apache Software Foundation  (http://www.apache.org/)."
      Alternately, this  acknowledgment may  appear in the software itself,  if
      and wherever such third-party acknowledgments normally appear.
  
   4. The names "Apache Cocoon" and  "Apache Software Foundation" must  not  be
      used to  endorse or promote  products derived from  this software without
      prior written permission. For written permission, please contact
      apache@apache.org.
  
   5. Products  derived from this software may not  be called "Apache", nor may
      "Apache" appear  in their name,  without prior written permission  of the
      Apache Software Foundation.
  
   THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
   FITNESS  FOR A PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT SHALL  THE
   APACHE SOFTWARE  FOUNDATION  OR ITS CONTRIBUTORS  BE LIABLE FOR  ANY DIRECT,
   INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLU-
   DING, BUT NOT LIMITED TO, PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS
   OF USE, DATA, OR  PROFITS; OR BUSINESS  INTERRUPTION)  HOWEVER CAUSED AND ON
   ANY  THEORY OF LIABILITY,  WHETHER  IN CONTRACT,  STRICT LIABILITY,  OR TORT
   (INCLUDING  NEGLIGENCE OR  OTHERWISE) ARISING IN  ANY WAY OUT OF THE  USE OF
   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  
   This software  consists of voluntary contributions made  by many individuals
   on  behalf of the Apache Software  Foundation and was  originally created by
   Stefano Mazzocchi  <st...@apache.org>. For more  information on the Apache
   Software Foundation, please see <http://www.apache.org/>.
  
  */
  package org.apache.cocoon.components.search;
  
  import org.apache.avalon.framework.activity.Disposable;
  import org.apache.avalon.framework.component.ComponentException;
  import org.apache.avalon.framework.component.ComponentManager;
  import org.apache.avalon.framework.component.Composable;
  import org.apache.avalon.framework.configuration.Configurable;
  import org.apache.avalon.framework.configuration.Configuration;
  import org.apache.avalon.framework.configuration.ConfigurationException;
  import org.apache.avalon.framework.logger.AbstractLogEnabled;
  import org.apache.cocoon.ProcessingException;
  import org.apache.cocoon.components.crawler.CocoonCrawler;
  import org.apache.lucene.analysis.Analyzer;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.index.IndexReader;
  import org.apache.lucene.index.IndexWriter;
  import org.apache.lucene.index.Term;
  import org.apache.lucene.index.TermEnum;
  import org.apache.lucene.store.Directory;
  
  import java.io.IOException;
  import java.net.URL;
  import java.util.Iterator;
  
  /**
   * A lucene indexer.
   *
   * <p>
   *  XML documents are indexed using lucene.
   *  Links to XML documents are supplied by
   *  a crawler, requesting links of documents by specifying a cocoon-view, and
   *  HTTP protocol.
   * </p>
   *
   * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
   * @version CVS $Id: SimpleLuceneCocoonIndexerImpl.java,v 1.1 2002/12/18 23:44:51 nicolaken Exp $
   */
  public class SimpleLuceneCocoonIndexerImpl extends AbstractLogEnabled
           implements LuceneCocoonIndexer, Configurable, Composable, Disposable
  {
  
      /**
       *Description of the Field
       *
       * @since
       */
      public final static String ANALYZER_CLASSNAME_CONFIG = "analyzer-classname";
      /**
       *Description of the Field
       *
       * @since
       */
      public final static String ANALYZER_CLASSNAME_DEFAULT = "org.apache.lucene.analysis.standard.StandardAnalyzer";
  
      /**
       *Description of the Field
       *
       * @since
       */
      public final static String DIRECTORY_CONFIG = "directory";
      /**
       *Description of the Field
       *
       * @since
       */
      public final static String DIRECTORY_DEFAULT = null;
  
      /**
       *Description of the Field
       *
       * @since
       */
      public final static String MERGE_FACTOR_CONFIG = "merge-factor";
  
      /**
       * http://www.mail-archive.com/lucene-user@jakarta.apache.org/msg00373.html
       */
      public final static int MERGE_FACTOR_DEFAULT = 10;
  
      /**
       * The component manager instance
       *
       * @since
       */
      protected ComponentManager manager = null;
  
      Analyzer analyzer;
      private String analyzerClassnameDefault = ANALYZER_CLASSNAME_DEFAULT;
      private String directoryDefault = DIRECTORY_DEFAULT;
      private int mergeFactor = MERGE_FACTOR_DEFAULT;
  
  
      /**
       *Sets the analyzer attribute of the SimpleLuceneCocoonIndexerImpl object
       *
       * @param  analyzer  The new analyzer value
       * @since
       */
      public void setAnalyzer(Analyzer analyzer) {
          this.analyzer = analyzer;
      }
  
  
      /**
       *Description of the Method
       *
       * @param  conf                        Description of Parameter
       * @exception  ConfigurationException  Description of Exception
       * @since
       */
      public void configure(Configuration conf) throws ConfigurationException {
          Configuration child;
          String value;
  
          child = conf.getChild(ANALYZER_CLASSNAME_CONFIG, false);
          if (child != null) {
              value = conf.getValue(ANALYZER_CLASSNAME_DEFAULT);
              if (value != null) {
                  analyzerClassnameDefault = value;
              }
          }
          child = conf.getChild(MERGE_FACTOR_CONFIG, false);
          if (child != null) {
              int int_value = conf.getValueAsInteger(MERGE_FACTOR_DEFAULT);
              mergeFactor = int_value;
          }
      }
  
  
      /**
       * Set the current <code>ComponentManager</code> instance used by this
       * <code>Composable</code>.
       *
       * @param  manager                 Description of Parameter
       * @exception  ComponentException  Description of Exception
       * @since
       */
      public void compose(ComponentManager manager) throws ComponentException {
          this.manager = manager;
      }
  
  
      /**
       *Description of the Method
       *
       * @since
       */
      public void dispose() { }
  
  
      /**
       * index content of base_url, index content of links from base_url.
       *
       * @param  index                    the lucene store to write the index to
       * @param  create                   iff true create, or overwrite existing index, else
       *   update existing index.
       * @param  base_url                 index content of base_url, and crawl through all its
       *   links recursivly.
       * @exception  ProcessingException  Description of Exception
       * @since
       */
      public void index(Directory index, boolean create, URL base_url)
               throws ProcessingException {
  
          IndexWriter writer = null;
          LuceneXMLIndexer lxi = null;
          CocoonCrawler cocoonCrawler = null;
  
          try {
              lxi = (LuceneXMLIndexer) manager.lookup(LuceneXMLIndexer.ROLE);
  
              writer = new IndexWriter(index, analyzer, create);
              writer.mergeFactor = this.mergeFactor;
  
              cocoonCrawler = (CocoonCrawler) manager.lookup(CocoonCrawler.ROLE);
              cocoonCrawler.crawl(base_url);
  
              Iterator cocoonCrawlerIterator = cocoonCrawler.iterator();
              while (cocoonCrawlerIterator.hasNext()) {
                  URL crawl_url = (URL) cocoonCrawlerIterator.next();
  
                  if (!crawl_url.getHost().equals(base_url.getHost()) ||
                          crawl_url.getPort() != base_url.getPort()) {
  
                      // skip urls using different host, or port than host,
                      // or port of base url
                      if (getLogger().isDebugEnabled()) {
                          getLogger().debug("Skipping crawling URL " + crawl_url.toString() +
                              " as base_url is " + base_url.toString());
                      }
                      continue;
                  }
  
                  // build lucene documents from the content of the crawl_url
                  Iterator i = lxi.build(crawl_url).iterator();
  
                  // add all built lucene documents
                  while (i.hasNext()) {
                      writer.addDocument((Document) i.next());
                  }
              }
              // optimize it
              writer.optimize();
          } catch (IOException ioe) {
              throw new ProcessingException("IOException in index()", ioe);
          } catch (ComponentException ce) {
              throw new ProcessingException("ComponentException in index()", ce);
          } finally {
              if (writer != null) {
                  try {
                      writer.close();
                  } catch (IOException ioe) {
                  }
                  writer = null;
              }
  
              if (lxi != null) {
                  manager.release(lxi);
                  lxi = null;
              }
              if (cocoonCrawler != null) {
                  manager.release(cocoonCrawler);
                  cocoonCrawler = null;
              }
          }
      }
  
  
      /**
       *Description of the Class
       *
       * @author     huberb1
       * @version
       */
      class DocumentDeletableIterator
      {
          private IndexReader reader;
          // existing index
          private TermEnum uidIter;
  
          // document id iterator
  
  
          /**
           *Constructor for the DocumentDeletableIterator object
           *
           * @param  directory        Description of Parameter
           * @exception  IOException  Description of Exception
           * @since
           */
          public DocumentDeletableIterator(Directory directory) throws IOException {
              reader = IndexReader.open(directory);
              // open existing index
              uidIter = reader.terms(new Term("uid", ""));
              // init uid iterator
          }
  
  
          /**
           *Description of the Method
           *
           * @exception  IOException  Description of Exception
           * @since
           */
          public void deleteAllStaleDocuments() throws IOException {
              while (uidIter.term() != null && uidIter.term().field() == "uid") {
                  reader.delete(uidIter.term());
                  uidIter.next();
              }
          }
  
  
          /**
           *Description of the Method
           *
           * @param  uid              Description of Parameter
           * @exception  IOException  Description of Exception
           * @since
           */
          public void deleteModifiedDocuments(String uid) throws IOException {
              while (documentHasBeenModified(uidIter.term(), uid)) {
                  reader.delete(uidIter.term());
                  uidIter.next();
              }
              if (documentHasNotBeenModified(uidIter.term(), uid)) {
                  uidIter.next();
              }
          }
  
  
          /**
           *Description of the Method
           *
           * @exception  Throwable  Description of Exception
           * @since
           */
          protected void finalize() throws Throwable {
              super.finalize();
              if (uidIter != null) {
                  uidIter.close();
                  // close uid iterator
                  uidIter = null;
              }
              if (reader != null) {
                  reader.close();
                  // close existing index
                  reader = null;
              }
          }
  
  
          /**
           *Description of the Method
           *
           * @param  term  Description of Parameter
           * @return       Description of the Returned Value
           * @since
           */
          boolean documentIsDeletable(Term term) {
              return term != null && term.field() == "uid";
          }
  
  
          /**
           *Description of the Method
           *
           * @param  term  Description of Parameter
           * @param  uid   Description of Parameter
           * @return       Description of the Returned Value
           * @since
           */
          boolean documentHasBeenModified(Term term, String uid) {
              return documentIsDeletable(term) &&
                      term.text().compareTo(uid) < 0;
          }
  
  
          /**
           *Description of the Method
           *
           * @param  term  Description of Parameter
           * @param  uid   Description of Parameter
           * @return       Description of the Returned Value
           * @since
           */
          boolean documentHasNotBeenModified(Term term, String uid) {
              return documentIsDeletable(term) &&
                      term.text().compareTo(uid) == 0;
          }
      }
  }
  
  
  
  
  1.1                  xml-cocoon2/src/blocks/lucene/java/org/apache/cocoon/components/search/SimpleLuceneCocoonSearcherImpl.java
  
  Index: SimpleLuceneCocoonSearcherImpl.java
  ===================================================================
  /*
  
   ============================================================================
                     The Apache Software License, Version 1.1
   ============================================================================
  
   Copyright (C) 1999-2002 The Apache Software Foundation. All rights reserved.
  
   Redistribution and use in source and binary forms, with or without modifica-
   tion, are permitted provided that the following conditions are met:
  
   1. Redistributions of  source code must  retain the above copyright  notice,
      this list of conditions and the following disclaimer.
  
   2. Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.
  
   3. The end-user documentation included with the redistribution, if any, must
      include  the following  acknowledgment:  "This product includes  software
      developed  by the  Apache Software Foundation  (http://www.apache.org/)."
      Alternately, this  acknowledgment may  appear in the software itself,  if
      and wherever such third-party acknowledgments normally appear.
  
   4. The names "Apache Cocoon" and  "Apache Software Foundation" must  not  be
      used to  endorse or promote  products derived from  this software without
      prior written permission. For written permission, please contact
      apache@apache.org.
  
   5. Products  derived from this software may not  be called "Apache", nor may
      "Apache" appear  in their name,  without prior written permission  of the
      Apache Software Foundation.
  
   THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
   FITNESS  FOR A PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT SHALL  THE
   APACHE SOFTWARE  FOUNDATION  OR ITS CONTRIBUTORS  BE LIABLE FOR  ANY DIRECT,
   INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLU-
   DING, BUT NOT LIMITED TO, PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS
   OF USE, DATA, OR  PROFITS; OR BUSINESS  INTERRUPTION)  HOWEVER CAUSED AND ON
   ANY  THEORY OF LIABILITY,  WHETHER  IN CONTRACT,  STRICT LIABILITY,  OR TORT
   (INCLUDING  NEGLIGENCE OR  OTHERWISE) ARISING IN  ANY WAY OUT OF THE  USE OF
   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  
   This software  consists of voluntary contributions made  by many individuals
   on  behalf of the Apache Software  Foundation and was  originally created by
   Stefano Mazzocchi  <st...@apache.org>. For more  information on the Apache
   Software Foundation, please see <http://www.apache.org/>.
  
  */
  package org.apache.cocoon.components.search;
  import org.apache.avalon.excalibur.pool.Recyclable;
  import org.apache.avalon.framework.activity.Disposable;
  import org.apache.avalon.framework.component.ComponentException;
  import org.apache.avalon.framework.component.ComponentManager;
  import org.apache.avalon.framework.component.Composable;
  import org.apache.avalon.framework.configuration.Configurable;
  import org.apache.avalon.framework.configuration.Configuration;
  import org.apache.avalon.framework.configuration.ConfigurationException;
  import org.apache.avalon.framework.logger.AbstractLogEnabled;
  import org.apache.cocoon.ProcessingException;
  import org.apache.cocoon.util.ClassUtils;
  import org.apache.lucene.analysis.Analyzer;
  import org.apache.lucene.index.IndexReader;
  import org.apache.lucene.queryParser.ParseException;
  import org.apache.lucene.queryParser.QueryParser;
  import org.apache.lucene.search.Hits;
  import org.apache.lucene.search.IndexSearcher;
  import org.apache.lucene.search.Query;
  import org.apache.lucene.store.Directory;
  import org.apache.lucene.store.FSDirectory;
  
  import java.io.File;
  import java.io.IOException;
  
  /**
   * This class provides searching via lucene.
   *
   * <p>
   *   In order to do searching you need a lucene Directory where the lucene generated
   *   index resides.
   *   Moreover you must know the lucene Analyzer which has been used for
   *   indexing, and which will be used for searching.
   * </p>
   * <p>
   *   Knowing this you can may start searching having a query which is parsable
   *   by an QueryParser, and having the name of the default field to use in
   *   searching.
   * </p>
   * <p>
   *   This class returns an Hit object as its search result.
   * </p>
   *
   * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
   * @version CVS $Id: SimpleLuceneCocoonSearcherImpl.java,v 1.1 2002/12/18 23:44:51 nicolaken Exp $
   */
  public class SimpleLuceneCocoonSearcherImpl extends AbstractLogEnabled
           implements LuceneCocoonSearcher, Configurable, Composable, Disposable, Recyclable
  {
  
      /**
       * Configuration element name of lucene's Analyzer class.
       * <p>
       *   Its value is
       *   <code>analyzer-classname</code>.
       * </p>
       *
       * @since
       */
      protected final static String ANALYZER_CLASSNAME_CONFIG = "analyzer-classname";
      /**
       * Configuration element default value of lucene's Analyzer class.
       * <p>
       *   Its value is,
       *   <code>org.apache.lucene.analysis.standard.StandardAnalyzer</code>.
       * </p>
       *
       * @since
       */
      protected final static String ANALYZER_CLASSNAME_DEFAULT = "org.apache.lucene.analysis.standard.StandardAnalyzer";
  
      /**
       * Configuration element name of default search field.
       * <p>
       *   Its value is
       *   <code>default-seach-field</code>.
       * </p>
       *
       * @since
       */
      protected final static String DEFAULT_SEARCH_FIELD_CONFIG = "default-search-field";
      /**
       * Configuration element default value of lucene's default search field.
       * <p>
       *   Its value is <code>body</code>.
       * </p>
       *
       * @since
       */
      protected final static String DEFAULT_SEARCH_FIELD_DEFAULT = "body";
  
      /**
       * Configuration element name of default-query.
       * <p>
       *   Its value is
       *   <code>default-query</code>.
       * </p>
       *
       * @since
       */
      protected final static String DEFAULT_QUERY_CONFIG = "default-query";
      /**
       * Configuration element default value of default-query.
       * <p>
       *   Its value is <code>null</code>.
       * </p>
       *
       * @since
       */
      protected final static String DEFAULT_QUERY_DEFAULT = null;
  
      /**
       * Configuration element name of query parser class name.
       * <p>
       *   Its value is
       *   <code>queryparser-classname</code>.
       * </p>
       *
       * @since
       */
      protected final static String QUERYPARSER_CLASSNAME_CONFIG = "queryparser-classname";
      /**
       * Configuration element default value of queryparser-classname.
       * <p>
       *   Its value is
       *   <code>org.apache.lucene.queryParser.QueryParser</code>.
       * </p>
       *
       * @since
       */
      protected final static String QUERYPARSER_CLASSNAME_DEFAULT = "org.apache.lucene.queryParser.QueryParser";
  
      /**
       * Configuration element name of lucene's default filesystem default
       * directory.
       * <p>
       *   Its value is <code>directory</code>.
       * </p>
       *
       * @since
       */
      protected final static String DIRECTORY_CONFIG = "directory";
      /**
       * Configuration element default value of filesystem default directory.
       * <p>
       *   Its value is <code>null</code>.
       * </p>
       *
       * @since
       */
      protected final static String DIRECTORY_DEFAULT = null;
  
      /**
       * The component manager instance
       *
       * @since
       */
      protected ComponentManager manager = null;
  
      private String analyzerClassnameDefault = ANALYZER_CLASSNAME_DEFAULT;
      private String defaultSearchFieldDefault = DEFAULT_SEARCH_FIELD_DEFAULT;
      private String defaultQueryDefault = DEFAULT_QUERY_DEFAULT;
      private String queryparserClassnameDefault = QUERYPARSER_CLASSNAME_DEFAULT;
      private String directoryDefault = DIRECTORY_DEFAULT;
  
      /**
       * The lucene analyzer used for searching
       */
      private Analyzer analyzer;
      /**
       * The lucene directory used for searching
       */
      private Directory directory;
      /**
       * The lucene index searcher used for searching
       */
      private IndexSearcher indexSearcher;
  
      /**
       * A lucene index reader cache to maximize sharing of
       * lucene index readers
       */
      private IndexReaderCache indexReaderCache;
  
      /**
       * set an analyzer, overriding the analyzerClassnameDefault.
       *
       * @param  analyzer  The new analyzer value
       * @since
       */
      public void setAnalyzer(Analyzer analyzer) {
          this.analyzer = analyzer;
      }
  
  
      /**
       *Sets the directory attribute of the SimpleLuceneCocoonSearcherImpl object
       *
       * @param  directory  The new directory value
       * @since
       */
      public void setDirectory(Directory directory) {
          this.directory = directory;
          if (indexReaderCache != null) {
              indexReaderCache.close();
              indexReaderCache = null;
          }
      }
  
  
      /**
       * Get an IndexReader.
       * <p>
       *   As an IndexReader might be cached, it is check if the indexReader is
       *   still valid.
       * </p>
       *
       * @return                  IndexReader an up to date indexReader
       * @exception  IOException  Description of Exception
       * @since
       */
      public IndexReader getReader() throws IOException {
          if (indexReaderCache == null) {
              indexReaderCache = new IndexReaderCache();
          }
          return indexReaderCache.getIndexReader(directory);
      }
  
  
      /**
       * configure
       *
       * @param  conf                        Description of Parameter
       * @exception  ConfigurationException  Description of Exception
       * @since
       */
      public void configure(Configuration conf) throws ConfigurationException {
          Configuration child;
          String value;
  
          child = conf.getChild(ANALYZER_CLASSNAME_CONFIG, false);
          if (child != null) {
              value = conf.getValue(ANALYZER_CLASSNAME_DEFAULT);
              if (value != null) {
                  analyzerClassnameDefault = value;
                  try {
                      analyzer = (Analyzer) ClassUtils.newInstance(analyzerClassnameDefault);
                  } catch (Exception e) {
                      throw new ConfigurationException("Cannot create analyzer of class " +
                              analyzerClassnameDefault, e);
                  }
              }
          }
  
          child = conf.getChild(DEFAULT_SEARCH_FIELD_CONFIG, false);
          if (child != null) {
              value = conf.getValue(DEFAULT_SEARCH_FIELD_DEFAULT);
              if (value != null) {
                  defaultSearchFieldDefault = value;
              }
          }
  
          child = conf.getChild(DEFAULT_QUERY_CONFIG, false);
          if (child != null) {
              value = conf.getValue(DEFAULT_QUERY_DEFAULT);
              if (value != null) {
                  defaultQueryDefault = value;
              }
          }
  
          child = conf.getChild(QUERYPARSER_CLASSNAME_CONFIG, false);
          if (child != null) {
              value = conf.getValue(QUERYPARSER_CLASSNAME_DEFAULT);
              if (value != null) {
                  queryparserClassnameDefault = value;
              }
          }
  
          child = conf.getChild(DIRECTORY_CONFIG, false);
          if (child != null) {
              value = conf.getValue(DIRECTORY_DEFAULT);
              if (value != null) {
                  directoryDefault = value;
                  try {
                      setDirectory(FSDirectory.getDirectory(new File(directoryDefault), false));
                  } catch (IOException ioe) {
                      throw new ConfigurationException("Cannot set index directory " + directoryDefault, ioe);
                  }
              }
          }
      }
  
  
      /**
       * Set the current <code>ComponentManager</code> instance used by this
       * <code>Composable</code>.
       *
       * @param  manager                 Description of Parameter
       * @exception  ComponentException  Description of Exception
       * @since
       */
      public void compose(ComponentManager manager) throws ComponentException {
          this.manager = manager;
      }
  
  
      /**
       *Description of the Method
       *
       * @since
       */
      public void dispose() {
          releaseIndexSearcher();
          releaseIndexReaderCache();
      }
  
  
      /**
       *Description of the Method
       *
       * @since
       */
      public void recycle() {
          releaseIndexSearcher();
          releaseIndexReaderCache();
      }
  
  
      /**
       *Description of the Method
       *
       * @param  query_string             Description of Parameter
       * @param  default_field            Description of Parameter
       * @return                          Description of the Returned Value
       * @exception  ProcessingException  Description of Exception
       * @since
       */
      public Hits search(String query_string, String default_field) throws ProcessingException {
          Hits hits = null;
  
          if (query_string == null) {
              query_string = defaultQueryDefault;
          }
          if (default_field == null) {
              default_field = defaultSearchFieldDefault;
          }
  
          try {
              Query query = QueryParser.parse(query_string, default_field, analyzer);
  
              // release index searcher for each new search
              releaseIndexSearcher();
  
              IndexSearcher indexSearcher = new IndexSearcher(getReader());
              hits = indexSearcher.search(query);
              // do not close indexSearcher now, as using hits needs an
              // opened indexSearcher indexSearcher.close();
          } catch (ParseException pe) {
              throw new ProcessingException("Cannot parse query " + query_string, pe);
          } catch (IOException ioe) {
              throw new ProcessingException("Cannot access hits", ioe);
          }
          return hits;
      }
  
  
      /**
       *Description of the Method
       *
       * @since
       */
      private void releaseIndexSearcher() {
          if (indexSearcher != null) {
              try {
                  indexSearcher.close();
              } catch (IOException ioe) {
                  // ignore it
              }
              indexSearcher = null;
          }
      }
  
  
      /**
       *Description of the Method
       *
       * @since
       */
      private void releaseIndexReaderCache() {
          if (indexReaderCache != null) {
              indexReaderCache = null;
          }
      }
  
  
      /**
       * This class should help to minimise usage of IndexReaders.
       *
       * @author     huberb1
       * @version
       */
      static class IndexReaderCache
      {
          private IndexReader indexReader;
          private long lastModified;
  
  
          /**
           * Create an IndexReaderCache.
           *
           * @since
           */
          IndexReaderCache() { }
  
  
          /**
           * return cached IndexReader object.
           *
           * @param  directory  lucene index directory
           * @return            The indexReader value
           * @since
           */
          public IndexReader getIndexReader(Directory directory) throws IOException {
              if (indexReader == null) {
                  createIndexReader(directory);
              } else {
                  if (!indexReaderIsValid(directory)) {
                      createIndexReader(directory);
                  }
              }
              return indexReader;
          }
  
  
          /**
           * Close an opened lucene IndexReader
           *
           * @since
           */
          public void close() {
              if (indexReader != null) {
                  try {
                      indexReader.close();
                  } catch (IOException ioe) {
                      // ignore it
                  }
                  indexReader = null;
              }
          }
  
  
          /**
           * Check if cached IndexReader is up to date.
           *
           * @param  directory        lucene index directory
           * @return                  boolean  return true if there is a cached IndexReader object,
           *   and its lastModified date is greater equal than the lastModified date
           *   of its lucene Directory.
           * @exception  IOException  Description of Exception
           * @since
           */
          public boolean indexReaderIsValid(Directory directory) throws IOException {
              return indexReader != null &&
                      indexReader.lastModified(directory) == lastModified;
          }
  
  
          /**
           *  Release all resources, most notably the lucene IndexReader.
           *
           * @exception  Throwable  Description of Exception
           * @since
           */
          protected void finalize() throws Throwable {
              close();
          }
  
  
          /**
           * Create unconditionally a lucene IndexReader.
           *
           * @param  directory        lucene index directory
           * @exception  IOException  Description of Exception
           * @since
           */
          private void createIndexReader(Directory directory) throws IOException {
              close();
              indexReader = IndexReader.open(directory);
              lastModified = indexReader.lastModified(directory);
          }
      }
  }
  
  
  
  
  1.1                  xml-cocoon2/src/blocks/lucene/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java
  
  Index: SimpleLuceneXMLIndexerImpl.java
  ===================================================================
  /*
  
   ============================================================================
                     The Apache Software License, Version 1.1
   ============================================================================
  
   Copyright (C) 1999-2002 The Apache Software Foundation. All rights reserved.
  
   Redistribution and use in source and binary forms, with or without modifica-
   tion, are permitted provided that the following conditions are met:
  
   1. Redistributions of  source code must  retain the above copyright  notice,
      this list of conditions and the following disclaimer.
  
   2. Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.
  
   3. The end-user documentation included with the redistribution, if any, must
      include  the following  acknowledgment:  "This product includes  software
      developed  by the  Apache Software Foundation  (http://www.apache.org/)."
      Alternately, this  acknowledgment may  appear in the software itself,  if
      and wherever such third-party acknowledgments normally appear.
  
   4. The names "Apache Cocoon" and  "Apache Software Foundation" must  not  be
      used to  endorse or promote  products derived from  this software without
      prior written permission. For written permission, please contact
      apache@apache.org.
  
   5. Products  derived from this software may not  be called "Apache", nor may
      "Apache" appear  in their name,  without prior written permission  of the
      Apache Software Foundation.
  
   THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
   FITNESS  FOR A PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT SHALL  THE
   APACHE SOFTWARE  FOUNDATION  OR ITS CONTRIBUTORS  BE LIABLE FOR  ANY DIRECT,
   INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLU-
   DING, BUT NOT LIMITED TO, PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS
   OF USE, DATA, OR  PROFITS; OR BUSINESS  INTERRUPTION)  HOWEVER CAUSED AND ON
   ANY  THEORY OF LIABILITY,  WHETHER  IN CONTRACT,  STRICT LIABILITY,  OR TORT
   (INCLUDING  NEGLIGENCE OR  OTHERWISE) ARISING IN  ANY WAY OUT OF THE  USE OF
   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  
   This software  consists of voluntary contributions made  by many individuals
   on  behalf of the Apache Software  Foundation and was  originally created by
   Stefano Mazzocchi  <st...@apache.org>. For more  information on the Apache
   Software Foundation, please see <http://www.apache.org/>.
  
  */
  package org.apache.cocoon.components.search;
  
  import org.apache.avalon.excalibur.xml.Parser;
  import org.apache.avalon.framework.component.ComponentException;
  import org.apache.avalon.framework.component.ComponentManager;
  import org.apache.avalon.framework.component.Composable;
  import org.apache.avalon.framework.configuration.Configurable;
  import org.apache.avalon.framework.configuration.Configuration;
  import org.apache.avalon.framework.configuration.ConfigurationException;
  import org.apache.avalon.framework.logger.AbstractLogEnabled;
  import org.apache.avalon.framework.thread.ThreadSafe;
  import org.apache.cocoon.ProcessingException;
  import org.apache.cocoon.util.Tokenizer;
  import org.apache.lucene.document.DateField;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.document.Field;
  import org.xml.sax.InputSource;
  import org.xml.sax.SAXException;
  
  import java.io.IOException;
  import java.io.InputStream;
  import java.net.URL;
  import java.net.URLConnection;
  import java.util.Collections;
  import java.util.HashSet;
  import java.util.Iterator;
  import java.util.List;
  
  
  /**
   * A simple class building lucene documents from xml content.
   *
   *	<p>
   *    It has two parameters that effect the way it works:
   *	</p><p>
   *	  <tt>&lt;store-fields/&gt;</tt> 
   *		Sets which tags in your content are stored in Lucene as fields, 
   *		during the indexing process. Allows them to be output with search hits.
   *	</p><p>		
   *		<tt>&lt;content-view-query/&gt;</tt>
   *		Sets the view the indexer will request for indexing content.
   *	</p><p>
   *	Example configuration (goes in cocoon.xconf)
   *	<pre><tt>
   *		&lt;lucene-xml-indexer logger="core.search.lucene"&gt;
   *			&lt;store-fields&gt;title, summary&lt;store-fields&gt;
   *			&lt;content-view-query&gt;cocoon-view=search&lt;/content-view-query&gt;
   *		&lt;/lucene-xml-indexer&gt;
   *	</tt></pre></p>
   *
   * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
   * @author     <a href="mailto:jeremy@apache.org">Jeremy Quinn</a>
   * @version CVS $Id: SimpleLuceneXMLIndexerImpl.java,v 1.1 2002/12/18 23:44:51 nicolaken Exp $
   */
  public class SimpleLuceneXMLIndexerImpl extends AbstractLogEnabled
           implements LuceneXMLIndexer, Configurable, Composable, ThreadSafe {
  
      /**
       * The component manager instance
       *
       * @since
       */
      protected ComponentManager manager = null;
  
      /**
       * Config element name specifying query-string appendend for requesting links
       * of an URL.
       * <p>
       *  Its value is <code>link-view-query</code>.
       * </p>
       *
       * @since
       */
      public final static String CONTENT_VIEW_QUERY_CONFIG = "content-view-query";
  
      /**
       * append this string to the url in order to get the
       * content view of the url
       *
       * @since
       */
      
      final String CONTENT_VIEW_QUERY_DEFAULT = "cocoon-view=content";
  
      /**
       * Config element name specifying the tags to be added as Stored, Untokenised, Unindexed Fields.
       * <p>
       *  Its value is <code>field-tags</code>.
       * </p>
       *
       * @since
       */
      public final static String FIELDTAGS_CONFIG = "store-fields";
  
      /**
       * set of allowed content types
       *
       * @since
       */
      final HashSet allowedContentType;
  
  
      /**
       * @since
       */
      public SimpleLuceneXMLIndexerImpl() {
          allowedContentType = new HashSet();
          allowedContentType.add("text/xml");
          allowedContentType.add("text/xhtml");
          fieldTags = new HashSet();
      }
      
      
      private String contentViewQuery = CONTENT_VIEW_QUERY_DEFAULT;
      private HashSet fieldTags;
  
  
      /**
       * configure
       *
       * @param  conf                        Description of Parameter
       * @exception  ConfigurationException  Description of Exception
       * @since
       */
      public void configure(Configuration configuration) throws ConfigurationException { 
      
          Configuration[] children;
          children = configuration.getChildren(FIELDTAGS_CONFIG);
          if (children != null && children.length > 0) {
              fieldTags = new HashSet();
              for (int i = 0; i < children.length; i++) {
                  String pattern = children[i].getValue();
   								Tokenizer t = new Tokenizer(pattern, ", ");
  								while (t.hasMoreTokens()) {
  										String tokenized_pattern = t.nextToken();
  										if (!tokenized_pattern.equals("")) {
  											this.fieldTags.add(tokenized_pattern);
  											if (getLogger().isDebugEnabled()) {
  													getLogger().debug("add field: " + tokenized_pattern);
  											}
  										}
  								}
              }
          } else {
              if (getLogger().isDebugEnabled()) {
                  getLogger().debug("Do not add any fields");
              }
          }
          this.contentViewQuery = configuration.getChild(CONTENT_VIEW_QUERY_CONFIG, true).getValue(CONTENT_VIEW_QUERY_DEFAULT);
  				if (getLogger().isDebugEnabled()) {
  						getLogger().debug("content view: " + this.contentViewQuery);
  				}
      }
  
  
      /**
       * Set the current <code>ComponentManager</code> instance used by this
       * <code>Composable</code>.
       *
       * @param  manager                 Description of Parameter
       * @exception  ComponentException  Description of Exception
       * @since
       */
      public void compose(ComponentManager manager) throws ComponentException {
          this.manager = manager;
      }
  
  
      /**
       * Build lucenen documents from a URL
       *
       * @param  url                      the content of this url gets indexed.
       * @exception  ProcessingException  Description of Exception
       * @since
       */
      public List build(URL url)
               throws ProcessingException {
  
          try {
              URL contentURL = new URL(url, url.getFile()
                  + ((url.getFile().indexOf("?") == -1) ? "?" : "&")
                  + contentViewQuery);
              URLConnection contentURLConnection = contentURL.openConnection();
              if (contentURLConnection == null) {
                  throw new ProcessingException("Can not open connection to URL "
                          + contentURL + " (null connection)");
              }
  
              String contentType = contentURLConnection.getContentType();
              if (contentType == null) {
                  if (getLogger().isDebugEnabled()) {
                      getLogger().debug("Ignoring " + contentURL + " (no content type)");
                  }
  
                  return Collections.EMPTY_LIST;
              }
  
              int index = contentType.indexOf(';');
              if (index != -1) {
                  contentType = contentType.substring(0, index);
              }
  
              if (allowedContentType.contains(contentType)) {
                  if (getLogger().isDebugEnabled()) {
                      getLogger().debug("Indexing " + contentURL + " (" + contentType + ")");
                  }
  
                  LuceneIndexContentHandler luceneIndexContentHandler = new LuceneIndexContentHandler();
                  luceneIndexContentHandler.setFieldTags(fieldTags);
                  indexDocument(contentURLConnection, luceneIndexContentHandler);
                  //
                  // document is parsed
                  //
                  Iterator it = luceneIndexContentHandler.iterator();
                  while (it.hasNext()) {
                      Document d = (Document) it.next();
                      d.add(Field.UnIndexed(URL_FIELD, url.toString()));
                      // store ... false, index ... true, token ... false
                      d.add(new Field(UID_FIELD, uid(contentURLConnection), false, true, false));
                  }
  
                  return luceneIndexContentHandler.allDocuments();
              } else {
                  if (getLogger().isDebugEnabled()) {
                      getLogger().debug("Ignoring " + contentURL + " (" + contentType + ")");
                  }
  
                  return Collections.EMPTY_LIST;
              }
          } catch (IOException ioe) {
              throw new ProcessingException("Cannot read URL " + url, ioe);
          }
      }
  
  
      /**
       * index input stream producing lucene Documents
       *
       * @param  contentURLConnection       the xml content which should get indexed.
       * @param  luceneIndexContentHandler  ContentHandler for generating
       *   a lucene Document from XML content.
       * @exception  ProcessingException    Description of Exception
       * @since
       */
      private void indexDocument(URLConnection contentURLConnection,
              LuceneIndexContentHandler luceneIndexContentHandler)
               throws ProcessingException {
  
          InputStream is = null;
          InputSource in = null;
          Parser parser = null;
  
          try {
              is = contentURLConnection.getInputStream();
              in = new InputSource(is);
  
              // get an XML parser
              parser = (Parser) this.manager.lookup(Parser.ROLE);
              //reader.setErrorHandler(new CocoonErrorHandler());
              parser.parse(in, luceneIndexContentHandler);
              //
              // document is parsed
              //
          } catch (IOException ioe) {
              throw new ProcessingException("Cannot read!", ioe);
          } catch (SAXException saxe) {
              throw new ProcessingException("Cannot parse!", saxe);
          } catch (ComponentException ce) {
              throw new ProcessingException("Cannot lookup xml parser!", ce);
          } finally {
              if (parser != null) {
                  this.manager.release(parser);
              }
          }
      }
  
  
      /**
       * return a unique uid of a url connection
       *
       * @param  urlConnection  Description of Parameter
       * @return                String unique uid of a urlConnection
       * @since
       */
      private String uid(URLConnection urlConnection) {
          // Append path and date into a string in such a way that lexicographic
          // sorting gives the same results as a walk of the file hierarchy.  Thus
          // null (\u0000) is used both to separate directory components and to
          // separate the path from the date.
          return urlConnection.toString().replace('/', '\u0000') +
                  "\u0000" +
                  DateField.timeToString(urlConnection.getLastModified());
      }
  }
  
  
  
  
  1.1                  xml-cocoon2/src/blocks/lucene/java/org/apache/cocoon/generation/SearchGenerator.java
  
  Index: SearchGenerator.java
  ===================================================================
  /*
  
   ============================================================================
                     The Apache Software License, Version 1.1
   ============================================================================
  
   Copyright (C) 1999-2002 The Apache Software Foundation. All rights reserved.
  
   Redistribution and use in source and binary forms, with or without modifica-
   tion, are permitted provided that the following conditions are met:
  
   1. Redistributions of  source code must  retain the above copyright  notice,
      this list of conditions and the following disclaimer.
  
   2. Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.
  
   3. The end-user documentation included with the redistribution, if any, must
      include  the following  acknowledgment:  "This product includes  software
      developed  by the  Apache Software Foundation  (http://www.apache.org/)."
      Alternately, this  acknowledgment may  appear in the software itself,  if
      and wherever such third-party acknowledgments normally appear.
  
   4. The names "Apache Cocoon" and  "Apache Software Foundation" must  not  be
      used to  endorse or promote  products derived from  this software without
      prior written permission. For written permission, please contact
      apache@apache.org.
  
   5. Products  derived from this software may not  be called "Apache", nor may
      "Apache" appear  in their name,  without prior written permission  of the
      Apache Software Foundation.
  
   THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
   FITNESS  FOR A PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT SHALL  THE
   APACHE SOFTWARE  FOUNDATION  OR ITS CONTRIBUTORS  BE LIABLE FOR  ANY DIRECT,
   INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLU-
   DING, BUT NOT LIMITED TO, PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS
   OF USE, DATA, OR  PROFITS; OR BUSINESS  INTERRUPTION)  HOWEVER CAUSED AND ON
   ANY  THEORY OF LIABILITY,  WHETHER  IN CONTRACT,  STRICT LIABILITY,  OR TORT
   (INCLUDING  NEGLIGENCE OR  OTHERWISE) ARISING IN  ANY WAY OUT OF THE  USE OF
   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  
   This software  consists of voluntary contributions made  by many individuals
   on  behalf of the Apache Software  Foundation and was  originally created by
   Stefano Mazzocchi  <st...@apache.org>. For more  information on the Apache
   Software Foundation, please see <http://www.apache.org/>.
  
  */
  package org.apache.cocoon.generation;
  
  import org.apache.avalon.framework.activity.Initializable;
  import org.apache.avalon.framework.activity.Disposable;
  import org.apache.avalon.framework.component.ComponentException;
  import org.apache.avalon.framework.component.ComponentManager;
  import org.apache.avalon.framework.context.Context;
  import org.apache.avalon.framework.context.ContextException;
  import org.apache.avalon.framework.context.Contextualizable;
  import org.apache.avalon.framework.parameters.Parameters;
  
  import org.apache.cocoon.Constants;
  import org.apache.cocoon.ProcessingException;
  import org.apache.cocoon.components.search.LuceneCocoonSearcher;
  import org.apache.cocoon.components.search.LuceneXMLIndexer;
  import org.apache.cocoon.components.search.LuceneCocoonPager;
  import org.apache.cocoon.components.search.LuceneCocoonHelper;
  import org.apache.cocoon.environment.ObjectModelHelper;
  import org.apache.cocoon.environment.Request;
  import org.apache.cocoon.environment.SourceResolver;
  
  import org.apache.lucene.analysis.Analyzer;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.document.Field;
  import org.apache.lucene.search.Hits;
  import org.apache.lucene.store.Directory;
  
  import org.xml.sax.SAXException;
  import org.xml.sax.helpers.AttributesImpl;
  
  import java.io.File;
  import java.io.IOException;
  import java.util.Iterator;
  import java.util.List;
  import java.util.Map;
  import java.util.Enumeration;
  
  /**
   * Generates an XML representation of a search result.
   *
   * <p>
   *  This generator generates xml content representening an XML search.
   *  The generated xml content contains the search result,
   *  the search query information, and navigation information about the
   *  search results.
   *  The query is sent to the generator, either via the 'queryString' request parameter
   *  or the 'query' SiteMap parameter. The sitemap overides the request.
   * </p>
   *
   * <p>
   *  Search xml sample generated by this generator:
   * </p>
   * <pre><tt>
   * &lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot;?&gt;
   *
   * &lt;search:results date=&quot;1008437081064&quot; query-string=&quot;cocoon&quot;
   *     start-index=&quot;0&quot; page-length=&quot;10&quot;
   *     xmlns:search=&quot;http://apache.org/cocoon/search/1.0&quot;
   *     xmlns:xlink=&quot;http://www.w3.org/1999/xlink&quot;&gt;
   *   &lt;search:hits total-count=&quot;125&quot; count-of-pages=&quot;13&quot;&gt;
   *     &lt;search:hit rank=&quot;0&quot; score=&quot;1.0&quot;
   *         uri=&quot;http://localhost:8080/cocoon/documents/hosting.html&quot;&gt;
   *       &lt;search:field name="title"&gt;Document Title&lt;search:field/&gt;
   *     &lt;search:hit/&gt;
   *     ...
   *   &lt;/search:hits&gt;
   *
   *   &lt;search:navigation total-count=&quot;125&quot; count-of-pages=&quot;13&quot;
   *       has-next=&quot;true&quot; has-previous=&quot;false&quot; next-index=&quot;10&quot; previous-index=&quot;0&quot;&gt;
   *     &lt;search:navigation-page start-index=&quot;0&quot;/&gt;
   *     &lt;search:navigation-page start-index=&quot;10&quot;/&gt;
   *     ...
   *     &lt;search:navigation-page start-index=&quot;120&quot;/&gt;
   *   &lt;/search:navigation&gt;
   * &lt;/search:results&gt;
   * </tt></pre>
   *
   * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
   * @author     <a href="mailto:vgritsenko@apache.org">Vadim Gritsenko</a>
   * @author     <a href="mailto:jeremy@apache.org">Jeremy Quinn</a>
   * @version    CVS $Id: SearchGenerator.java,v 1.1 2002/12/18 23:44:51 nicolaken Exp $
   */
  public class SearchGenerator extends ComposerGenerator
      implements Contextualizable, Initializable, Disposable
  {
  
      /**
       * The XML namespace for the output document.
       */
      protected final static String namespace = "http://apache.org/cocoon/search/1.0";
  
      /**
       * The XML namespace for xlink
       */
      protected final static String xlinkNamespace = "http://www.w3.org/1999/xlink";
  
      /**
       * Description of the Field
       */
      protected final static String CDATA = "CDATA";
  
      /**
       * Root element of generated xml content, ie <code>results</code>.
       */
      protected final static String RESULTS_ELEMENT = "results";
  
      /**
       * Attribute <code>date</code> of <code>results</code> element.
       * It contains the date a long value, indicating when a search
       * generated this xml content.
       */
      protected final static String DATE_ATTRIBUTE = "date";
  
      /**
       * Attribute <code>query-string</code> of <code>results</code> element.
       * Echos the <code>queryString</code> query parameter.
       */
      protected final static String QUERY_STRING_ATTRIBUTE = "query-string";
  
      /**
       * Attribute <code>start-index</code> of <code>results</code> element.
       * Echos the <code>startIndex</code> query parameter.
       */
      protected final static String START_INDEX_ATTRIBUTE = "start-index";
  
      /**
       * Attribute <code>page-length</code> of <code>results</code> element.
       * Echos the <code>pageLenth</code> query parameter.
       */
      protected final static String PAGE_LENGTH_ATTRIBUTE = "page-length";
  
      /**
       * Child element of generated xml content, ie <code>hits</code>.
       * This element describes all hits.
       */
      protected final static String HITS_ELEMENT = "hits";
  
      /**
       * Attribute <code>total-count</code> of <code>hits</code> element.
       * The value describes total number of hits found by the search engine.
       */
      protected final static String TOTAL_COUNT_ATTRIBUTE = "total-count";
  
      /**
       * Attribute <code>count-of-pages</code> of <code>hits</code> element.
       * The value describes number of pages needed for all hits.
       */
      protected final static String COUNT_OF_PAGES_ATTRIBUTE = "count-of-pages";
  
      /**
       * Child element of generated xml content, ie <code>hit</code>.
       * This element describes a single hit.
       */
      protected final static String HIT_ELEMENT = "hit";
  
      /**
       * Attribute <code>rank</code> of <code>hit</code> element.
       * The value describes the count index of this hits, ranging between 0, and
       * total-count minus 1.
       */
      protected final static String RANK_ATTRIBUTE = "rank";
  
      /**
       * Attribute <code>score</code> of <code>hit</code> element.
       * The value describes the score of this hits, ranging between 0, and 1.0.
       */
      protected final static String SCORE_ATTRIBUTE = "score";
  
      /**
       * Attribute <code>uri</code> of <code>hit</code> element.
       * The value describes the uri of a document matching the search query.
       */
      protected final static String URI_ATTRIBUTE = "uri";
  
      /**
       * Child element <code>field</code> of the <code>hit</code> element.
       * This element contains value of the stored field of a hit.
       *
       * @since 2.0.4
       */
      protected final static String FIELD_ELEMENT = "field";
  
      /**
       * Child element of generated xml content, ie <code>navigation</code>.
       * This element describes some hints for easier navigation.
       */
      protected final static String NAVIGATION_ELEMENT = "navigation";
  
      /**
       * Child element of generated xml content, ie <code>navigation</code>.
       * This element describes the start-index of page containing hits.
       */
      protected final static String NAVIGATION_PAGE_ELEMENT = "navigation-page";
  
      /**
       * Attribute <code>has-next</code> of <code>navigation-page</code> element.
       * The value is true if a next navigation control should be presented.
       */
      protected final static String HAS_NEXT_ATTRIBUTE = "has-next";
  
      /**
       * Attribute <code>has-next</code> of <code>navigation-page</code> element.
       * The value is true if a previous navigation control should be presented.
       */
      protected final static String HAS_PREVIOUS_ATTRIBUTE = "has-previous";
  
      /**
       * Attribute <code>next-index</code> of <code>navigation-page</code> element.
       * The value describes the start-index of the next-to-be-presented page.
       */
      protected final static String NEXT_INDEX_ATTRIBUTE = "next-index";
  
      /**
       * Attribute <code>previous-index</code> of <code>navigation-page</code> element.
       * The value describes the start-index of the previous-to-be-presented page.
       */
      protected final static String PREVIOUS_INDEX_ATTRIBUTE = "previous-index";
  
      /**
       * Setup parameter name of index directory, ie <code>index</code>.
       */
      protected final static String INDEX_PARAM = "index";
  
      /**
       * Default value of setup parameter <code>index</code>, ie <code>index</code>.
       */
      protected final static String INDEX_PARAM_DEFAULT = "index";
  
      /**
       * Setup the actual query from generator parameter,
       * ie <code>query</code>.
       */
      protected final static String QUERY_PARAM = "query";
  
      /**
       * Setup parameter name specifying the name of query-string query parameter,
       * ie <code>query-string</code>.
       */
      protected final static String QUERY_STRING_PARAM = "query-string";
  
      /**
       * Default value of setup parameter <code>query-string</code>, ie <code>queryString</code>.
       */
      protected final static String QUERY_STRING_PARAM_DEFAULT = "queryString";
  
      /**
       * Setup parameter name specifying the name of start-index query parameter,
       * ie <code>start-index</code>.
       */
      protected final static String START_INDEX_PARAM = "start-index";
  
      /**
       * Default value of setup parameter <code>start-index</code>, ie <code>startIndex</code>.
       */
      protected final static String START_INDEX_PARAM_DEFAULT = "startIndex";
  
      /**
       * Setup parameter name specifying the name of start-next-index query parameter,
       * ie <code>start-next-index</code>.
       */
      protected final static String START_INDEX_NEXT_PARAM = "start-next-index";
  
      /**
       * Default value of setup parameter <code>start-next-index</code>, ie <code>startNextIndex</code>.
       */
      protected final static String START_INDEX_NEXT_PARAM_DEFAULT = "startNextIndex";
  
      /**
       * Setup parameter name specifying the name of start-previous-index query parameter,
       * ie <code>start-previous-index</code>.
       */
      protected final static String START_INDEX_PREVIOUS_PARAM = "start-previous-index";
  
      /**
       * Default value of setup parameter <code>start-previous-index</code>, ie <code>startPreviousIndex</code>.
       */
      protected final static String START_INDEX_PREVIOUS_PARAM_DEFAULT = "startPreviousIndex";
  
      /**
       *Description of the Field
       *
       * @since
       */
      protected final static int START_INDEX_DEFAULT = 0;
  
      /**
       * Setup parameter name specifying the name of page-length query parameter,
       * ie <code>page-length</code>.
       */
      protected final static String PAGE_LENGTH_PARAM = "page-length";
  
      /**
       *Description of the Field
       *
       * @since
       */
      protected final static String PAGE_LENGTH_PARAM_DEFAULT = "pageLength";
  
      /**
       *Description of the Field
       *
       * @since
       */
      protected final static int PAGE_LENGTH_DEFAULT = 10;
  
  
  
      /**
       * Default home directory of index directories.
       * <p>
       *   Releative index directories specified in the setup of this generator are resolved
       *   relative to this directory.
       * </p>
       * <p>
       *   By default this directory is set to the <code>WORKING_DIR</code> of Cocoon.
       * </p>
       */
      private File workDir = null;
  
      /**
       * The avalon component to use for searching.
       */
      private LuceneCocoonSearcher lcs;
  
      /**
       * Absolute filesystem directory of lucene index directory
       */
      private File index = null;
  
      /**
       * Query-string to search for
       */
      private String queryString = "";
  
      /**
       * Attributes used when generating xml content.
       */
      private final AttributesImpl atts = new AttributesImpl();
  
      /**
       * startIndex of query parameter
       */
      private Integer startIndex = null;
  
      /**
       * pageLength of query parameter
       */
      private Integer pageLength = null;
  
  
      // TODO: parameterize()
  
      /**
       * Set the current <code>ComponentManager</code> instance used by this
       * <code>Composable</code>.
       */
      public void compose(ComponentManager manager) throws ComponentException {
          super.compose(manager);
  //        lcs = (LuceneCocoonSearcher) this.manager.lookup(LuceneCocoonSearcher.ROLE);
      }
  
      /**
       * setup all members of this generator.
       *
       * @since
       */
      public void setup(SourceResolver resolver, Map objectModel, String src, Parameters par)
               throws ProcessingException, SAXException, IOException {
          super.setup(resolver, objectModel, src, par);
  
          String param_name;
          Request request = ObjectModelHelper.getRequest(objectModel);
  
          // get the analyzer
  //        Analyzer analyzer = LuceneCocoonHelper.getAnalyzer("org.apache.lucene.analysis.standard.StandardAnalyzer");
  //        lcs.setAnalyzer(analyzer);
  
          String index_file_name = par.getParameter(INDEX_PARAM, INDEX_PARAM_DEFAULT);
          if (request.getParameter(INDEX_PARAM) != null) {
              index_file_name = request.getParameter(INDEX_PARAM);
          }
  
          // now set the index
          index = new File(index_file_name);
          if (!index.isAbsolute()) {
              index = new File(workDir, index.toString());
          }
  
          // try getting the queryString from the generator sitemap params
          
          queryString = "";
          queryString = par.getParameter(QUERY_PARAM, "");
          
          // try getting the queryString from the request params
          if (queryString.equals("")) {
  					param_name = par.getParameter(QUERY_STRING_PARAM, QUERY_STRING_PARAM_DEFAULT);
  					if (request.getParameter(param_name) != null) {
  							queryString = request.getParameter(param_name);
  					}
  				}
          // always try lookup the start index from the request params
          // get startIndex
          startIndex = null;
          param_name = par.getParameter(START_INDEX_NEXT_PARAM, START_INDEX_NEXT_PARAM_DEFAULT);
          if (request.getParameter(param_name) != null) {
              startIndex = createInteger(request.getParameter(param_name));
          }
  
          if (startIndex == null) {
              param_name = par.getParameter(START_INDEX_PREVIOUS_PARAM, START_INDEX_PREVIOUS_PARAM_DEFAULT);
              if (request.getParameter(param_name) != null) {
                  startIndex = createInteger(request.getParameter(param_name));
              }
          }
          if (startIndex == null) {
              param_name = par.getParameter(START_INDEX_PARAM, START_INDEX_PARAM_DEFAULT);
              if (request.getParameter(param_name) != null) {
                  startIndex = createInteger(request.getParameter(param_name));
              }
          }
  
          // get pageLength
          param_name = par.getParameter(PAGE_LENGTH_PARAM, PAGE_LENGTH_PARAM_DEFAULT);
          if (request.getParameter(param_name) != null) {
              pageLength = createInteger(request.getParameter(param_name));
          }
      }
  
  
      /**
       * Contextualize this class.
       *
       * <p>
       *   Especially retrieve the work directory.
       *   If the index directory is specified relativly, the working directory is
       *   used as home directory of the index directory.
       * </p>
       *
       * @param  context               Context to use
       * @exception  ContextException  If contextualizing fails.
       * @since
       */
      public void contextualize(Context context) throws ContextException {
          // retrieve the working directory, assuming that the index may reside there
          workDir = (File) context.get(Constants.CONTEXT_WORK_DIR);
      }
  
      public void initialize() throws IOException {
          // get the directory where the index resides
  //        Directory directory = LuceneCocoonHelper.getDirectory(new File(workDir, "index"), false);
  //        lcs.setDirectory(directory);
      }
  
      /**
       * Generate xml content describing search results.
       * Entry point of the ComposerGenerator.
       * The xml content is generated from the hits object.
       *
       *
       * @exception  IOException       when there is a problem reading the from file system.
       * @since
       * @throws  SAXException         when there is a problem creating the output SAX events.
       * @throws  ProcessingException  when there is a problem obtaining the hits
       */
      public void generate() throws IOException, SAXException, ProcessingException {
          // set default parameter value, in case of no values are set yet.
          if (startIndex == null) {
              startIndex = new Integer(START_INDEX_DEFAULT);
          }
          if (pageLength == null) {
              pageLength = new Integer(PAGE_LENGTH_DEFAULT);
          }
  
          // Start the document and set the namespace.
          this.contentHandler.startDocument();
          this.contentHandler.startPrefixMapping("search", namespace);
          this.contentHandler.startPrefixMapping("xlink", xlinkNamespace);
  
          generateResults();
  
          // End the document.
          this.contentHandler.endPrefixMapping("xlink");
          this.contentHandler.endPrefixMapping("");
          this.contentHandler.endDocument();
      }
  
  
      /**
       * Create an Integer.
       * <p>
       *   Create an Integer from String s, iff conversion fails return null.
       * </p>
       *
       * @param  s  Converting s to an Integer
       * @return    Integer converted value originating from s, or null
       * @since
       */
      private Integer createInteger(String s) {
          Integer i = null;
          try {
              i = new Integer(s);
          } catch (NumberFormatException nfe) {
              // ignore it, write only warning
              if (getLogger().isWarnEnabled()) {
                  getLogger().warn("Cannot convert " + s + " to Integer", nfe);
              }
          }
          return i;
      }
  
  
      /**
       * Build and generate the search results.
       * <p>
       *  First build the hits, next generate xml content from the hits,
       *  taking page index, and length into account.
       * </p>
       *
       * @since
       * @throws  SAXException         when there is a problem creating the output SAX events.
       * @throws  ProcessingException  when there is a problem obtaining the hits
       */
      private void generateResults() throws SAXException, ProcessingException {
  
          // Make the hits
          LuceneCocoonPager pager = buildHits();
  
          // The current date and time.
          long time = System.currentTimeMillis();
  
          atts.clear();
          atts.addAttribute(namespace, DATE_ATTRIBUTE,
              DATE_ATTRIBUTE, CDATA, String.valueOf(time));
          if (queryString != null && queryString.length() > 0)
              atts.addAttribute(namespace, QUERY_STRING_ATTRIBUTE,
                  QUERY_STRING_ATTRIBUTE, CDATA, String.valueOf(queryString));
          atts.addAttribute(namespace, START_INDEX_ATTRIBUTE,
              START_INDEX_ATTRIBUTE, CDATA, String.valueOf(startIndex));
          atts.addAttribute(namespace, PAGE_LENGTH_ATTRIBUTE,
              PAGE_LENGTH_ATTRIBUTE, CDATA, String.valueOf(pageLength));
  
          contentHandler.startElement(namespace, RESULTS_ELEMENT, RESULTS_ELEMENT, atts);
  
          // build xml from the hits
          generateHits(pager);
          generateNavigation(pager);
  
          // End root element.
          contentHandler.endElement(namespace, "results", "results");
      }
  
  
      /**
       * Generate the xml content of all hits
       *
       * @param  pager                 the LuceneContentPager with the search results
       * @since
       * @throws  SAXException         when there is a problem creating the output SAX events.
       */
      private void generateHits(LuceneCocoonPager pager) throws SAXException {
          if (pager != null && pager.hasNext()) {
              atts.clear();
              atts.addAttribute(namespace, TOTAL_COUNT_ATTRIBUTE, TOTAL_COUNT_ATTRIBUTE,
                  CDATA, String.valueOf(pager.getCountOfHits()));
              atts.addAttribute(namespace, COUNT_OF_PAGES_ATTRIBUTE, COUNT_OF_PAGES_ATTRIBUTE,
                  CDATA, String.valueOf(pager.getCountOfPages()));
              contentHandler.startElement(namespace, HITS_ELEMENT, HITS_ELEMENT, atts);
              generateHit(pager);
              contentHandler.endElement(namespace, HITS_ELEMENT, HITS_ELEMENT);
          }
      }
  
  
      /**
       * Generate the xml content for each hit.
       *
       * @param  pager                 the LuceneCocoonPager with the search results.
       * @since
       * @throws  SAXException         when there is a problem creating the output SAX events.
       */
      private void generateHit(LuceneCocoonPager pager) throws SAXException {
          // get the off set to start from
          int counter = pager.getStartIndex();
  
          // get an list of hits which should be placed onto a single page
          List l = (List) pager.next();
          Iterator i = l.iterator();
          for (; i.hasNext(); counter++) {
              LuceneCocoonPager.HitWrapper hw = (LuceneCocoonPager.HitWrapper) i.next();
              Document doc = hw.getDocument();
              float score = hw.getScore();
              String uri = doc.get(LuceneXMLIndexer.URL_FIELD);
  
              atts.clear();
              atts.addAttribute(namespace, RANK_ATTRIBUTE, RANK_ATTRIBUTE, CDATA,
                      String.valueOf(counter));
              atts.addAttribute(namespace, SCORE_ATTRIBUTE, SCORE_ATTRIBUTE, CDATA,
                      String.valueOf(score));
              atts.addAttribute(namespace, URI_ATTRIBUTE, URI_ATTRIBUTE, CDATA,
                      String.valueOf(uri));
              contentHandler.startElement(namespace, HIT_ELEMENT, HIT_ELEMENT, atts);
              // fix me, add here a summary of this hit
              for (Enumeration e = doc.fields(); e.hasMoreElements(); ) {
                  Field field = (Field)e.nextElement();
                  if (field.isStored()) {
                      if (LuceneXMLIndexer.URL_FIELD.equals(field.name()))
                          continue;
                      atts.clear();
                      atts.addAttribute(namespace, "name", "name", CDATA, field.name());
                      contentHandler.startElement(namespace, FIELD_ELEMENT, FIELD_ELEMENT, atts);
                      String value = field.stringValue();
                      contentHandler.characters(value.toCharArray(), 0, value.length());
                      contentHandler.endElement(namespace, FIELD_ELEMENT, FIELD_ELEMENT);
                  }
              }
  
              contentHandler.endElement(namespace, HIT_ELEMENT, HIT_ELEMENT);
          }
      }
  
  
      /**
       * Generate the navigation element.
       *
       * @param  pager                    Description of Parameter
       * @exception  SAXException         Description of Exception
       * @since
       */
      private void generateNavigation(LuceneCocoonPager pager) throws SAXException {
  
          if (pager != null) {
              // generate navigation element
              atts.clear();
              atts.addAttribute(namespace, TOTAL_COUNT_ATTRIBUTE, TOTAL_COUNT_ATTRIBUTE,
                  CDATA, String.valueOf(pager.getCountOfHits()));
              atts.addAttribute(namespace, COUNT_OF_PAGES_ATTRIBUTE, COUNT_OF_PAGES_ATTRIBUTE,
                  CDATA, String.valueOf(pager.getCountOfPages()));
              atts.addAttribute(namespace, HAS_NEXT_ATTRIBUTE, HAS_NEXT_ATTRIBUTE,
                  CDATA, String.valueOf(pager.hasNext()));
              atts.addAttribute(namespace, HAS_PREVIOUS_ATTRIBUTE, HAS_PREVIOUS_ATTRIBUTE,
                  CDATA, String.valueOf(pager.hasPrevious()));
              atts.addAttribute(namespace, NEXT_INDEX_ATTRIBUTE, NEXT_INDEX_ATTRIBUTE,
                  CDATA, String.valueOf(pager.nextIndex()));
              atts.addAttribute(namespace, PREVIOUS_INDEX_ATTRIBUTE, PREVIOUS_INDEX_ATTRIBUTE,
                  CDATA, String.valueOf(pager.previousIndex()));
              contentHandler.startElement(namespace, NAVIGATION_ELEMENT, NAVIGATION_ELEMENT, atts);
              int count_of_pages = pager.getCountOfPages();
              for (int i = 0, page_start_index = 0;
                      i < count_of_pages;
                      i++, page_start_index += pageLength.intValue()) {
                  atts.clear();
                  atts.addAttribute(namespace, START_INDEX_ATTRIBUTE, START_INDEX_ATTRIBUTE,
                      CDATA, String.valueOf(page_start_index));
                  contentHandler.startElement(namespace, NAVIGATION_PAGE_ELEMENT, NAVIGATION_PAGE_ELEMENT, atts);
                  contentHandler.endElement(namespace, NAVIGATION_PAGE_ELEMENT, NAVIGATION_PAGE_ELEMENT);
              }
              // navigation is EMPTY element
              contentHandler.endElement(namespace, NAVIGATION_ELEMENT, NAVIGATION_ELEMENT);
          }
      }
  
  
      /**
       * Build hits from a query input, and setup paging object.
       *
       * @since
       * @throws  ProcessingException  iff an error occurs
       */
      private LuceneCocoonPager buildHits() throws ProcessingException {
  
          if (queryString != null && queryString.length() != 0) {
              Hits hits = null;
  
              // TODO (VG): Move parts into compose/initialize/recycle
              try {
                  lcs = (LuceneCocoonSearcher) this.manager.lookup(LuceneCocoonSearcher.ROLE);
                  Analyzer analyzer = LuceneCocoonHelper.getAnalyzer("org.apache.lucene.analysis.standard.StandardAnalyzer");
                  lcs.setAnalyzer(analyzer);
                  // get the directory where the index resides
                  Directory directory = LuceneCocoonHelper.getDirectory(index, false);
                  lcs.setDirectory(directory);
                  hits = lcs.search(queryString, LuceneXMLIndexer.BODY_FIELD);
              } catch (IOException ioe) {
                  throw new ProcessingException("IOException in search", ioe);
              } catch (ComponentException ce) {
                  throw new ProcessingException("ComponentException in search", ce);
              } finally {
                  if (lcs != null) {
                      this.manager.release(lcs);
                      lcs = null;
                  }
              }
  
              // wrap the hits by an pager help object for accessing only a range of hits
              LuceneCocoonPager pager = new LuceneCocoonPager(hits);
  
              int start_index = START_INDEX_DEFAULT;
              if (this.startIndex != null) {
                  start_index = this.startIndex.intValue();
                  if (start_index <= 0) {
                      start_index = 0;
                  }
                  pager.setStartIndex(start_index);
              }
  
              int page_length = PAGE_LENGTH_DEFAULT;
              if (this.pageLength != null) {
                  page_length = this.pageLength.intValue();
                  if (page_length <= 0) {
                      page_length = hits.length();
                  }
                  pager.setCountOfHitsPerPage(page_length);
              }
  
              return pager;
          }
  
          return null;
      }
  
      /**
       * Recycle the generator
       */
      public void recycle() {
          super.recycle();
          this.queryString = null;
          this.startIndex = null;
          this.pageLength = null;
          this.index = null;
      }
  
      public void dispose() {
  //        if (lcs != null) {
  //            this.manager.release(lcs);
  //            lcs = null;
  //        }
          super.dispose();
      }
  }
  
  
  
  
  1.1                  xml-cocoon2/src/blocks/lucene/java/org/apache/cocoon/transformation/LuceneIndexTransformer.java
  
  Index: LuceneIndexTransformer.java
  ===================================================================
  /*
  
   ============================================================================
                     The Apache Software License, Version 1.1
   ============================================================================
  
   Copyright (C) 1999-2002 The Apache Software Foundation. All rights reserved.
  
   Redistribution and use in source and binary forms, with or without modifica-
   tion, are permitted provided that the following conditions are met:
  
   1. Redistributions of  source code must  retain the above copyright  notice,
      this list of conditions and the following disclaimer.
  
   2. Redistributions in binary form must reproduce the above copyright notice,
      this list of conditions and the following disclaimer in the documentation
      and/or other materials provided with the distribution.
  
   3. The end-user documentation included with the redistribution, if any, must
      include  the following  acknowledgment:  "This product includes  software
      developed  by the  Apache Software Foundation  (http://www.apache.org/)."
      Alternately, this  acknowledgment may  appear in the software itself,  if
      and wherever such third-party acknowledgments normally appear.
  
   4. The names "Apache Cocoon" and  "Apache Software Foundation" must  not  be
      used to  endorse or promote  products derived from  this software without
      prior written permission. For written permission, please contact
      apache@apache.org.
  
   5. Products  derived from this software may not  be called "Apache", nor may
      "Apache" appear  in their name,  without prior written permission  of the
      Apache Software Foundation.
  
   THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
   INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
   FITNESS  FOR A PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT SHALL  THE
   APACHE SOFTWARE  FOUNDATION  OR ITS CONTRIBUTORS  BE LIABLE FOR  ANY DIRECT,
   INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLU-
   DING, BUT NOT LIMITED TO, PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS
   OF USE, DATA, OR  PROFITS; OR BUSINESS  INTERRUPTION)  HOWEVER CAUSED AND ON
   ANY  THEORY OF LIABILITY,  WHETHER  IN CONTRACT,  STRICT LIABILITY,  OR TORT
   (INCLUDING  NEGLIGENCE OR  OTHERWISE) ARISING IN  ANY WAY OUT OF THE  USE OF
   THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  
   This software  consists of voluntary contributions made  by many individuals
   on  behalf of the Apache Software  Foundation and was  originally created by
   Stefano Mazzocchi  <st...@apache.org>. For more  information on the Apache
   Software Foundation, please see <http://www.apache.org/>.
  
  */
  package org.apache.cocoon.transformation;
  
  import org.apache.avalon.framework.activity.Disposable;
  import org.apache.avalon.framework.component.ComponentException;
  import org.apache.avalon.framework.component.ComponentManager;
  import org.apache.avalon.framework.configuration.Configurable;
  import org.apache.avalon.framework.configuration.Configuration;
  import org.apache.avalon.framework.configuration.ConfigurationException;
  import org.apache.avalon.framework.logger.AbstractLoggable;
  import org.apache.avalon.framework.context.ContextException;
  import org.apache.avalon.framework.context.Contextualizable;
  import org.apache.avalon.framework.context.Context;
  import org.apache.avalon.framework.parameters.Parameters;
  import org.apache.avalon.excalibur.pool.Recyclable;
  
  import org.apache.cocoon.ProcessingException;
  import org.apache.cocoon.Constants;
  import org.apache.cocoon.caching.Cacheable;
  import org.apache.cocoon.caching.NOPCacheValidity;
  import org.apache.cocoon.caching.CacheValidity;
  import org.apache.cocoon.environment.SourceResolver;
  import org.apache.cocoon.components.search.LuceneXMLIndexer;
  import org.apache.cocoon.components.search.LuceneCocoonHelper;
  
  import org.apache.lucene.index.IndexWriter;
  import org.apache.lucene.store.Directory;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.document.Field;
  import org.apache.lucene.analysis.Analyzer;
  
  import org.xml.sax.Attributes;
  import org.xml.sax.SAXException;
  import org.xml.sax.helpers.AttributesImpl;
  
  import java.io.IOException;
  import java.io.File;
  import java.util.Stack;
  import java.util.Map;
  
  /**
   * A lucene index creation transformer.
   * <p>FIXME: Write Documentation.</p>
   *
   * @author     <a href="mailto:vgritsenko@apache.org">Vadim Gritsenko</a>
   * @version    CVS $Revision: 1.1 $ $Date: 2002/12/18 23:44:51 $
   */
  public class LuceneIndexTransformer extends AbstractTransformer
      implements Disposable, Cacheable, Recyclable, Configurable, Contextualizable {
  
      public static final String ANALYZER_CLASSNAME_CONFIG = "analyzer-classname";
      public static final String ANALYZER_CLASSNAME_PARAMETER = "analyzer-classname";
      public static final String ANALYZER_CLASSNAME_DEFAULT = "org.apache.lucene.analysis.standard.StandardAnalyzer";
      public static final String DIRECTORY_CONFIG = "directory";
      public static final String DIRECTORY_PARAMETER = "directory";
      public static final String DIRECTORY_DEFAULT = "index";
      public static final String MERGE_FACTOR_CONFIG = "merge-factor";
      public static final String MERGE_FACTOR_PARAMETER = "merge-factor";
      public static final int MERGE_FACTOR_DEFAULT = 20;
  
      public static final String LUCENE_URI = "http://apache.org/cocoon/lucene/1.0";
      public static final String LUCENE_QUERY_ELEMENT = "index";
      public static final String LUCENE_QUERY_ANALYZER_ATTRIBUTE = "analyzer";
      public static final String LUCENE_QUERY_DIRECTORY_ATTRIBUTE = "directory";
      public static final String LUCENE_QUERY_CREATE_ATTRIBUTE = "create";
      public static final String LUCENE_QUERY_MERGE_FACTOR_ATTRIBUTE = "merge-factor";
      public static final String LUCENE_DOCUMENT_ELEMENT = "document";
      public static final String LUCENE_DOCUMENT_URL_ATTRIBUTE = "url";
      public static final String LUCENE_ELEMENT_ATTR_TO_TEXT_ATTRIBUTE = "text-attr";
      public static final String LUCENE_ELEMENT_ATTR_STORE_VALUE = "store";
  
      // Initialization time variables
      protected ComponentManager manager = null;
      protected File workDir = null;
  
      // Declaration time parameters values
      private String analyzerClassnameDefault;
      private String directoryDefault;
      private int mergeFactorDefault;
  
      // Invocation time parameters values
      private String analyzerClassname;
      private String directory;
      private int mergeFactor;
  
  
      // Runtime variables
      private int processing;
      private IndexWriter writer;
      private StringBuffer bodyText;
      private Document bodyDocument;
      private String bodyDocumentURL;
      private Stack elementStack = new Stack();
  
  
      private static String uid(String url) {
          return url.replace('/', '\u0000'); // + "\u0000" + DateField.timeToString(urlConnection.getLastModified());
      }
  
  
      public void configure(Configuration conf) throws ConfigurationException {
          this.analyzerClassnameDefault = conf.getChild(ANALYZER_CLASSNAME_CONFIG)
              .getValue(ANALYZER_CLASSNAME_DEFAULT);
          this.mergeFactorDefault = conf.getChild(MERGE_FACTOR_CONFIG)
              .getValueAsInteger(MERGE_FACTOR_DEFAULT);
          this.directoryDefault = conf.getChild(DIRECTORY_CONFIG)
              .getValue(DIRECTORY_DEFAULT);
      }
  
      /**
       * Setup the transformer.
       */
      public void setup(SourceResolver resolver, Map objectModel, String src, Parameters parameters)
      throws ProcessingException, SAXException, IOException {
          // We don't need all this stuff
          this.analyzerClassname = parameters.getParameter(ANALYZER_CLASSNAME_PARAMETER, analyzerClassnameDefault);
          this.directory = parameters.getParameter(DIRECTORY_PARAMETER, directoryDefault);
          this.mergeFactor = parameters.getParameterAsInteger(MERGE_FACTOR_PARAMETER, mergeFactorDefault);
      }
  
      public void compose(ComponentManager manager) throws ComponentException {
          this.manager = manager;
      }
  
      /**
       * Contextualize this class
       */
      public void contextualize(Context context) throws ContextException {
          this.workDir = (File) context.get(Constants.CONTEXT_WORK_DIR);
      }
  
      public void recycle() {
          this.processing = 0;
          if (this.writer != null) {
              try { this.writer.close(); } catch (IOException ioe) { }
              this.writer = null;
          }
          this.bodyText = null;
          this.bodyDocument = null;
          this.bodyDocumentURL = null;
          this.elementStack.clear();
      }
  
      public void dispose() {
      }
  
      /**
       * Generate the unique key.
       * This key must be unique inside the space of this component.
       *
       * @return The generated key hashes the src
       */
      public long generateKey() {
          return 1;
      }
  
      /**
       * Generate the validity object.
       *
       * @return The generated validity object or <code>null</code> if the
       *         component is currently not cacheable.
       */
      public CacheValidity generateValidity() {
          return NOPCacheValidity.CACHE_VALIDITY;
      }
  
  
  
      public void startDocument() throws SAXException {
          super.startDocument();
      }
  
      public void endDocument() throws SAXException {
          super.endDocument();
      }
  
      /**
       * Begin the scope of a prefix-URI Namespace mapping.
       *
       * @param prefix The Namespace prefix being declared.
       * @param uri The Namespace URI the prefix is mapped to.
       */
      public void startPrefixMapping(String prefix, String uri) throws SAXException {
          if (processing == 0) {
              super.startPrefixMapping(prefix,uri);
          }
      }
  
      /**
       * End the scope of a prefix-URI mapping.
       *
       * @param prefix The prefix that was being mapping.
       */
      public void endPrefixMapping(String prefix) throws SAXException {
          if (processing == 0) {
              super.endPrefixMapping(prefix);
          }
      }
  
      public void startElement(String namespaceURI, String localName, String qName, Attributes atts)
          throws SAXException {
  
          if (processing == 0) {
              if (LUCENE_URI.equals(namespaceURI) && LUCENE_QUERY_ELEMENT.equals(localName)){
                  String sCreate = atts.getValue(LUCENE_QUERY_CREATE_ATTRIBUTE);
                  boolean bCreate = sCreate != null &&
                      (sCreate.equalsIgnoreCase("yes") || sCreate.equalsIgnoreCase("true"));
  
                  String analyzerClassname =
                      atts.getValue(LUCENE_QUERY_ANALYZER_ATTRIBUTE);
                  if (analyzerClassname == null)
                      analyzerClassname = this.analyzerClassname;
                  Analyzer analyzer = LuceneCocoonHelper.getAnalyzer(analyzerClassname);
  
                  String sMergeFactor =
                      atts.getValue(LUCENE_QUERY_MERGE_FACTOR_ATTRIBUTE);
                  int mergeFactor = this.mergeFactor;
                  if (sMergeFactor != null)
                      mergeFactor = Integer.parseInt(sMergeFactor);
  
                  String directoryName =
                      atts.getValue(LUCENE_QUERY_DIRECTORY_ATTRIBUTE);
                  if (directoryName == null)
                      directoryName = this.directory;
  
                  // System.out.println("QUERY Create=" + bCreate + ", Directory=" + directoryName + ", Analyzer=" + analyzerClassname);
                  try {
                      Directory directory = LuceneCocoonHelper.getDirectory(
                          new File(workDir, directoryName), bCreate);
  
                      writer = new IndexWriter(directory, analyzer, bCreate);
                      writer.mergeFactor = mergeFactor;
                  } catch (IOException e) {
                      throw new SAXException(e);
                  }
  
                  processing = 1;
              } else {
                  super.startElement(namespaceURI, localName, qName, atts);
              }
          } else if (processing == 1) {
              if (LUCENE_URI.equals(namespaceURI) && LUCENE_DOCUMENT_ELEMENT.equals(localName)){
                  this.bodyDocumentURL = atts.getValue(LUCENE_DOCUMENT_URL_ATTRIBUTE);
                  if (this.bodyDocumentURL == null)
                      throw new SAXException("<lucene:document> must have @url attribute");
  
                  // System.out.println("  DOCUMENT URL=" + bodyDocumentURL);
                  this.bodyText = new StringBuffer();
                  this.bodyDocument = new Document();
                  this.elementStack.clear();
                  processing = 2;
              } else {
                  throw new SAXException("<lucene:query> element can contain only <lucene:document> elements!");
              }
          } else if (processing == 2) {
              elementStack.push(new IndexHelperField(localName, new AttributesImpl(atts)));
          }
      }
  
      public void endElement(String namespaceURI, String localName, String qName)
          throws SAXException {
  
          if (processing == 1) {
              if (LUCENE_URI.equals(namespaceURI) && LUCENE_QUERY_ELEMENT.equals(localName)) {
                  // End query processing
                  // System.out.println("QUERY END!");
                  try {
                      this.writer.optimize();
                      this.writer.close();
                      this.writer = null;
                  } catch (IOException e) {
                      throw new SAXException(e);
                  }
  
                  this.processing = 0;
              } else {
                  throw new SAXException("</lucene:query> was expected!");
              }
          } else if (processing == 2) {
              if (LUCENE_URI.equals(namespaceURI) && LUCENE_DOCUMENT_ELEMENT.equals(localName)) {
                  // End document processing
                  this.bodyDocument.add(Field.UnStored(LuceneXMLIndexer.BODY_FIELD, this.bodyText.toString()));
                  System.out.println("    DOCUMENT BODY=" + this.bodyText);
                  this.bodyText = null;
  
                  this.bodyDocument.add(Field.UnIndexed(LuceneXMLIndexer.URL_FIELD, this.bodyDocumentURL));
                  // store: false, index: true, tokenize: false
                  this.bodyDocument.add(new Field(LuceneXMLIndexer.UID_FIELD, uid(this.bodyDocumentURL), false, true, false));
                  // System.out.println("    DOCUMENT UID=" + uid(this.bodyDocumentURL));
                  this.bodyDocumentURL = null;
                  // System.out.println("  DOCUMENT END!");
                  try {
                      this.writer.addDocument(this.bodyDocument);
                      this.bodyDocument = null;
                  } catch (IOException e) {
                      throw new SAXException(e);
                  }
  
                  this.processing = 1;
              } else {
                  // End element processing
                  IndexHelperField tos = (IndexHelperField) elementStack.pop();
                  StringBuffer text = tos.getText();
  
                  Attributes atts = tos.getAttributes();
                  boolean attributesToText = atts.getIndex(LUCENE_URI, LUCENE_ELEMENT_ATTR_TO_TEXT_ATTRIBUTE) != -1;
                  for (int i = 0; atts != null && i < atts.getLength(); i++) {
                      // Ignore Lucene attributes
                      if (LUCENE_URI.equals(atts.getURI(i)))
                          continue;
  
                      String atts_lname = atts.getLocalName(i);
                      String atts_value = atts.getValue(i);
                      // System.out.println("        ATTRIBUTE " + localName + "@" + atts_lname + "=" + atts_value);
                      bodyDocument.add(Field.UnStored(localName + "@" + atts_lname, atts_value));
                      if (attributesToText) {
                          text.append(atts_value);
                          text.append(' ');
                          bodyText.append(atts_value);
                          bodyText.append(' ');
                      }
                  }
  
                  boolean store = atts.getIndex(LUCENE_URI, LUCENE_ELEMENT_ATTR_STORE_VALUE) != -1;
                  if (text != null && text.length() > 0) {
                      // System.out.println("      ELEMENT " + localName + "=" + text);
                      if (store) {
                          bodyDocument.add(Field.Text(localName, text.toString()));
                      } else {
                          bodyDocument.add(Field.UnStored(localName, text.toString()));
                      }
                  }
              }
          } else {
              // All other tags
              super.endElement(namespaceURI, localName, qName);
          }
      }
  
      public void characters(char[] ch, int start, int length)
          throws SAXException {
  
          if (processing == 2 && ch.length > 0 && start >= 0 && length > 1 && elementStack.size() > 0) {
              String text = new String(ch, start, length);
              ((IndexHelperField) elementStack.peek()).append(text);
              bodyText.append(text);
              bodyText.append(' ');
          } else if (processing == 0) {
              super.characters(ch, start, length);
          }
      }
  
  
      class IndexHelperField
      {
          String localName;
          StringBuffer text;
          Attributes attributes;
  
          IndexHelperField(String localName, Attributes atts) {
              this.localName = localName;
              this.attributes = atts;
              this.text = new StringBuffer();
          }
  
          public Attributes getAttributes() {
              return attributes;
          }
  
          public StringBuffer getText() {
              return text;
          }
  
          public void append(String text) {
              this.text.append(text);
          }
  
          public void append(char[] str, int offset, int length) {
              this.text.append(str, offset, length);
          }
      }
  }
  
  
  

----------------------------------------------------------------------
In case of troubles, e-mail:     webmaster@xml.apache.org
To unsubscribe, e-mail:          cocoon-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: cocoon-cvs-help@xml.apache.org