You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by je...@apache.org on 2002/11/23 15:12:46 UTC

cvs commit: xml-cocoon2/src/java/org/apache/cocoon/components/search SimpleLuceneXMLIndexerImpl.java

jeremy      2002/11/23 06:12:46

  Modified:    src/java/org/apache/cocoon/components/search
                        SimpleLuceneXMLIndexerImpl.java
  Log:
  added configuration parameters for 'content-view-query' and 'store-fields'.
  
  Revision  Changes    Path
  1.14      +82 -4     xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java
  
  Index: SimpleLuceneXMLIndexerImpl.java
  ===================================================================
  RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java,v
  retrieving revision 1.13
  retrieving revision 1.14
  diff -u -r1.13 -r1.14
  --- SimpleLuceneXMLIndexerImpl.java	3 Aug 2002 02:33:35 -0000	1.13
  +++ SimpleLuceneXMLIndexerImpl.java	23 Nov 2002 14:12:46 -0000	1.14
  @@ -91,10 +91,32 @@
   import org.xml.sax.SAXParseException;
   import org.xml.sax.XMLReader;
   
  +import org.apache.cocoon.util.Tokenizer;
  +
  +
   /**
    * A simple class building lucene documents from xml content.
    *
  + *	<p>
  + *    It has two parameters that effect the way it works:
  + *	</p><p>
  + *	  <tt>&lt;store-fields/&gt;</tt> 
  + *		Sets which tags in your content are stored in Lucene as fields, 
  + *		during the indexing process. Allows them to be output with search hits.
  + *	</p><p>		
  + *		<tt>&lt;content-view-query/&gt;</tt>
  + *		Sets the view the indexer will request for indexing content.
  + *	</p><p>
  + *	Example configuration (goes in cocoon.xconf)
  + *	<pre><tt>
  + *		&lt;lucene-xml-indexer logger="core.search.lucene"&gt;
  + *			&lt;store-fields&gt;title, summary&lt;store-fields&gt;
  + *			&lt;content-view-query&gt;cocoon-view=search&lt;/content-view-query&gt;
  + *		&lt;/lucene-xml-indexer&gt;
  + *	</tt></pre></p>
  + *
    * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
  + * @author     <a href="mailto:jeremy@apache.org">Jeremy Quinn</a>
    * @version CVS $Id$
    */
   public class SimpleLuceneXMLIndexerImpl extends AbstractLogEnabled
  @@ -108,12 +130,34 @@
       protected ComponentManager manager = null;
   
       /**
  +     * Config element name specifying query-string appendend for requesting links
  +     * of an URL.
  +     * <p>
  +     *  Its value is <code>link-view-query</code>.
  +     * </p>
  +     *
  +     * @since
  +     */
  +    public final static String CONTENT_VIEW_QUERY_CONFIG = "content-view-query";
  +
  +    /**
        * append this string to the url in order to get the
        * content view of the url
        *
        * @since
        */
  -    final String CONTENT_QUERY = "cocoon-view=content";
  +    
  +    final String CONTENT_VIEW_QUERY_DEFAULT = "cocoon-view=content";
  +
  +    /**
  +     * Config element name specifying the tags to be added as Stored, Untokenised, Unindexed Fields.
  +     * <p>
  +     *  Its value is <code>field-tags</code>.
  +     * </p>
  +     *
  +     * @since
  +     */
  +    public final static String FIELDTAGS_CONFIG = "store-fields";
   
       /**
        * set of allowed content types
  @@ -130,7 +174,12 @@
           allowedContentType = new HashSet();
           allowedContentType.add("text/xml");
           allowedContentType.add("text/xhtml");
  +        fieldTags = new HashSet();
       }
  +    
  +    
  +    private String contentViewQuery = CONTENT_VIEW_QUERY_DEFAULT;
  +    private HashSet fieldTags;
   
   
       /**
  @@ -140,7 +189,35 @@
        * @exception  ConfigurationException  Description of Exception
        * @since
        */
  -    public void configure(Configuration conf) throws ConfigurationException { }
  +    public void configure(Configuration configuration) throws ConfigurationException { 
  +    
  +        Configuration[] children;
  +        children = configuration.getChildren(FIELDTAGS_CONFIG);
  +        if (children != null && children.length > 0) {
  +            fieldTags = new HashSet();
  +            for (int i = 0; i < children.length; i++) {
  +                String pattern = children[i].getValue();
  + 								Tokenizer t = new Tokenizer(pattern, ", ");
  +								while (t.hasMoreTokens()) {
  +										String tokenized_pattern = t.nextToken();
  +										if (!tokenized_pattern.equals("")) {
  +											this.fieldTags.add(tokenized_pattern);
  +											if (getLogger().isDebugEnabled()) {
  +													getLogger().debug("add field: " + tokenized_pattern);
  +											}
  +										}
  +								}
  +            }
  +        } else {
  +            if (getLogger().isDebugEnabled()) {
  +                getLogger().debug("Do not add any fields");
  +            }
  +        }
  +        this.contentViewQuery = configuration.getChild(CONTENT_VIEW_QUERY_CONFIG, true).getValue(CONTENT_VIEW_QUERY_DEFAULT);
  +				if (getLogger().isDebugEnabled()) {
  +						getLogger().debug("content view: " + this.contentViewQuery);
  +				}
  +    }
   
   
       /**
  @@ -169,7 +246,7 @@
           try {
               URL contentURL = new URL(url, url.getFile()
                   + ((url.getFile().indexOf("?") == -1) ? "?" : "&")
  -                + CONTENT_QUERY);
  +                + contentViewQuery);
               URLConnection contentURLConnection = contentURL.openConnection();
               if (contentURLConnection == null) {
                   throw new ProcessingException("Can not open connection to URL "
  @@ -196,6 +273,7 @@
                   }
   
                   LuceneIndexContentHandler luceneIndexContentHandler = new LuceneIndexContentHandler();
  +                luceneIndexContentHandler.setFieldTags(fieldTags);
                   indexDocument(contentURLConnection, luceneIndexContentHandler);
                   //
                   // document is parsed
  
  
  

----------------------------------------------------------------------
In case of troubles, e-mail:     webmaster@xml.apache.org
To unsubscribe, e-mail:          cocoon-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: cocoon-cvs-help@xml.apache.org