You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by je...@apache.org on 2002/11/23 15:12:46 UTC
cvs commit: xml-cocoon2/src/java/org/apache/cocoon/components/search SimpleLuceneXMLIndexerImpl.java
jeremy 2002/11/23 06:12:46
Modified: src/java/org/apache/cocoon/components/search
SimpleLuceneXMLIndexerImpl.java
Log:
added configuration parameters for 'content-view-query' and 'store-fields'.
Revision Changes Path
1.14 +82 -4 xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java
Index: SimpleLuceneXMLIndexerImpl.java
===================================================================
RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- SimpleLuceneXMLIndexerImpl.java 3 Aug 2002 02:33:35 -0000 1.13
+++ SimpleLuceneXMLIndexerImpl.java 23 Nov 2002 14:12:46 -0000 1.14
@@ -91,10 +91,32 @@
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
+import org.apache.cocoon.util.Tokenizer;
+
+
/**
* A simple class building lucene documents from xml content.
*
+ * <p>
+ * It has two parameters that effect the way it works:
+ * </p><p>
+ * <tt><store-fields/></tt>
+ * Sets which tags in your content are stored in Lucene as fields,
+ * during the indexing process. Allows them to be output with search hits.
+ * </p><p>
+ * <tt><content-view-query/></tt>
+ * Sets the view the indexer will request for indexing content.
+ * </p><p>
+ * Example configuration (goes in cocoon.xconf)
+ * <pre><tt>
+ * <lucene-xml-indexer logger="core.search.lucene">
+ * <store-fields>title, summary<store-fields>
+ * <content-view-query>cocoon-view=search</content-view-query>
+ * </lucene-xml-indexer>
+ * </tt></pre></p>
+ *
* @author <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
+ * @author <a href="mailto:jeremy@apache.org">Jeremy Quinn</a>
* @version CVS $Id$
*/
public class SimpleLuceneXMLIndexerImpl extends AbstractLogEnabled
@@ -108,12 +130,34 @@
protected ComponentManager manager = null;
/**
+ * Config element name specifying query-string appendend for requesting links
+ * of an URL.
+ * <p>
+ * Its value is <code>link-view-query</code>.
+ * </p>
+ *
+ * @since
+ */
+ public final static String CONTENT_VIEW_QUERY_CONFIG = "content-view-query";
+
+ /**
* append this string to the url in order to get the
* content view of the url
*
* @since
*/
- final String CONTENT_QUERY = "cocoon-view=content";
+
+ final String CONTENT_VIEW_QUERY_DEFAULT = "cocoon-view=content";
+
+ /**
+ * Config element name specifying the tags to be added as Stored, Untokenised, Unindexed Fields.
+ * <p>
+ * Its value is <code>field-tags</code>.
+ * </p>
+ *
+ * @since
+ */
+ public final static String FIELDTAGS_CONFIG = "store-fields";
/**
* set of allowed content types
@@ -130,7 +174,12 @@
allowedContentType = new HashSet();
allowedContentType.add("text/xml");
allowedContentType.add("text/xhtml");
+ fieldTags = new HashSet();
}
+
+
+ private String contentViewQuery = CONTENT_VIEW_QUERY_DEFAULT;
+ private HashSet fieldTags;
/**
@@ -140,7 +189,35 @@
* @exception ConfigurationException Description of Exception
* @since
*/
- public void configure(Configuration conf) throws ConfigurationException { }
+ public void configure(Configuration configuration) throws ConfigurationException {
+
+ Configuration[] children;
+ children = configuration.getChildren(FIELDTAGS_CONFIG);
+ if (children != null && children.length > 0) {
+ fieldTags = new HashSet();
+ for (int i = 0; i < children.length; i++) {
+ String pattern = children[i].getValue();
+ Tokenizer t = new Tokenizer(pattern, ", ");
+ while (t.hasMoreTokens()) {
+ String tokenized_pattern = t.nextToken();
+ if (!tokenized_pattern.equals("")) {
+ this.fieldTags.add(tokenized_pattern);
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug("add field: " + tokenized_pattern);
+ }
+ }
+ }
+ }
+ } else {
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug("Do not add any fields");
+ }
+ }
+ this.contentViewQuery = configuration.getChild(CONTENT_VIEW_QUERY_CONFIG, true).getValue(CONTENT_VIEW_QUERY_DEFAULT);
+ if (getLogger().isDebugEnabled()) {
+ getLogger().debug("content view: " + this.contentViewQuery);
+ }
+ }
/**
@@ -169,7 +246,7 @@
try {
URL contentURL = new URL(url, url.getFile()
+ ((url.getFile().indexOf("?") == -1) ? "?" : "&")
- + CONTENT_QUERY);
+ + contentViewQuery);
URLConnection contentURLConnection = contentURL.openConnection();
if (contentURLConnection == null) {
throw new ProcessingException("Can not open connection to URL "
@@ -196,6 +273,7 @@
}
LuceneIndexContentHandler luceneIndexContentHandler = new LuceneIndexContentHandler();
+ luceneIndexContentHandler.setFieldTags(fieldTags);
indexDocument(contentURLConnection, luceneIndexContentHandler);
//
// document is parsed
----------------------------------------------------------------------
In case of troubles, e-mail: webmaster@xml.apache.org
To unsubscribe, e-mail: cocoon-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: cocoon-cvs-help@xml.apache.org