You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lenya.apache.org by mi...@apache.org on 2003/11/13 23:55:18 UTC
cvs commit: cocoon-lenya/src/java/org/apache/lenya/lucene/index AbstractIndexer.java ConfigurableDocumentCreator.java ConfigurableIndexer.java DefaultIndexer.java Index.java Indexer.java configuration2xslt.xsl
michi 2003/11/13 14:55:18
Modified: src/java/org/apache/lenya/lucene/index AbstractIndexer.java
ConfigurableDocumentCreator.java
ConfigurableIndexer.java DefaultIndexer.java
Index.java Indexer.java configuration2xslt.xsl
Log:
configurable document creator fixed
Revision Changes Path
1.8 +28 -13 cocoon-lenya/src/java/org/apache/lenya/lucene/index/AbstractIndexer.java
Index: AbstractIndexer.java
===================================================================
RCS file: /home/cvs/cocoon-lenya/src/java/org/apache/lenya/lucene/index/AbstractIndexer.java,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -r1.7 -r1.8
--- AbstractIndexer.java 23 Jul 2003 13:21:27 -0000 1.7
+++ AbstractIndexer.java 13 Nov 2003 22:55:17 -0000 1.8
@@ -1,5 +1,4 @@
/*
-$Id$
<License>
============================================================================
@@ -77,7 +76,9 @@
* The factory method {@link #getDocumentCreator(String[])} is used to create a
* DocumentCreator from the command-line arguments.
*
- * @author hrt
+ * @author Andreas Hartmann
+ * @author Michael Wechner
+ * @version $Id$
*/
public abstract class AbstractIndexer implements Indexer {
private CommandLineLogger logger = new CommandLineLogger(getClass());
@@ -101,8 +102,8 @@
/**
* Initializes this indexer with command-line parameters.
*/
- public void configure(Element element) throws Exception {
- documentCreator = createDocumentCreator(element);
+ public void configure(Element indexer, String configFileName) throws Exception {
+ documentCreator = createDocumentCreator(indexer, configFileName);
}
/**
@@ -114,8 +115,7 @@
*
* @throws Exception DOCUMENT ME!
*/
- public abstract DocumentCreator createDocumentCreator(Element element)
- throws Exception;
+ public abstract DocumentCreator createDocumentCreator(Element indexer, String configFileName) throws Exception;
/**
* Updates the index incrementally.
@@ -191,21 +191,36 @@
* Returns the filter used to receive the indexable files.
*/
public FileFilter getFilter() {
- return new AbstractIndexer.DefaultIndexFilter();
+ String[] indexableExtensions = { "html", "htm", "txt" };
+ return new AbstractIndexer.DefaultIndexFilter(indexableExtensions);
}
/**
* FileFilter used to obtain the files to index.
*/
public class DefaultIndexFilter implements FileFilter {
- protected final String[] indexableExtensions = { "html", "htm", "txt" };
+ protected String[] indexableExtensions;
+
+ /**
+ * Default indexable extensions: html, htm, txt
+ */
+ public DefaultIndexFilter() {
+ String[] iE = { "html", "htm", "txt" };
+ indexableExtensions = iE;
+ }
+
+ /**
+ *
+ */
+ public DefaultIndexFilter(String[] indexableExtensions) {
+ this.indexableExtensions = indexableExtensions;
+ }
/** Tests whether or not the specified abstract pathname should be
* included in a pathname list.
*
* @param pathname The abstract pathname to be tested
- * @return <code>true</code> if and only if <code>pathname</code>
- * should be included
+ * @return <code>true</code> if and only if <code>pathname</code> should be included
*
*/
public boolean accept(File file) {
1.7 +36 -16 cocoon-lenya/src/java/org/apache/lenya/lucene/index/ConfigurableDocumentCreator.java
Index: ConfigurableDocumentCreator.java
===================================================================
RCS file: /home/cvs/cocoon-lenya/src/java/org/apache/lenya/lucene/index/ConfigurableDocumentCreator.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- ConfigurableDocumentCreator.java 23 Jul 2003 13:21:27 -0000 1.6
+++ ConfigurableDocumentCreator.java 13 Nov 2003 22:55:17 -0000 1.7
@@ -1,5 +1,4 @@
/*
-$Id$
<License>
============================================================================
@@ -64,6 +63,8 @@
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.log4j.Category;
+
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
@@ -81,6 +82,7 @@
import java.lang.reflect.Method;
import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
@@ -90,10 +92,13 @@
/**
- *
- * @author hrt
+ * @author Andreas Hartmann
+ * @author Michael Wechner
+ * @version $Id$
*/
public class ConfigurableDocumentCreator extends AbstractDocumentCreator {
+ Category log = Category.getInstance(ConfigurableDocumentCreator.class);
+
public static final String LUCENE_NAMESPACE = "http://www.wyona.org/2003/lucene";
public static final String XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml";
@@ -118,7 +123,7 @@
}
/**
- * DOCUMENT ME!
+ * Transform source document into lucene document and generate a Lucene Document instance
*
* @param file DOCUMENT ME!
* @param htdocsDumpDir DOCUMENT ME!
@@ -127,25 +132,39 @@
*
* @throws Exception DOCUMENT ME!
*/
- public Document getDocument(File file, File htdocsDumpDir)
- throws Exception {
- // System.out.println(getClass().getName() + ": indexing " + file.getAbsolutePath());
+ public Document getDocument(File file, File htdocsDumpDir) throws Exception {
+ log.debug(".getDocument() : indexing " + file.getAbsolutePath());
try {
- // transform source document into lucene document
+
+ org.w3c.dom.Document sourceDocument = null;
+ DocumentBuilderFactory parserFactory = DocumentBuilderFactory.newInstance();
+ parserFactory.setValidating(false);
+ parserFactory.setNamespaceAware(true);
+ parserFactory.setIgnoringElementContentWhitespace(true);
+ DocumentBuilder mybuilder = parserFactory.newDocumentBuilder();
+ sourceDocument = mybuilder.parse(file.getAbsolutePath());
+
+
+// FIXME: What is this good for: <?xml version="1.0"?><body>...</body>
+/*
NamespaceHelper documentHelper = new NamespaceHelper(XHTML_NAMESPACE, "xhtml", "html");
org.w3c.dom.Document sourceDocument = documentHelper.getDocument();
+
Element rootNode = sourceDocument.getDocumentElement();
String bodyText = getBodyText(file);
Element bodyElement = documentHelper.createElement("body", bodyText);
rootNode.appendChild(bodyElement);
+*/
+
+
+
DOMSource documentSource = new DOMSource(sourceDocument);
Writer documentWriter = new StringWriter();
TransformerFactory tFactory = TransformerFactory.newInstance();
- Transformer documentTransformer = tFactory.newTransformer(new StreamSource(
- new StringReader(getStylesheet())));
+ Transformer documentTransformer = tFactory.newTransformer(new StreamSource(new StringReader(getStylesheet())));
documentTransformer.setOutputProperty(OutputKeys.INDENT, "yes");
documentTransformer.setOutputProperty(OutputKeys.ENCODING, "ISO-8859-1");
@@ -158,11 +177,11 @@
documentTransformer.setParameter("filename", fileName);
documentTransformer.transform(documentSource, new StreamResult(documentWriter));
- dumpLuceneDocument(file, documentWriter);
+ // DEBUG: debug lucene documents
+ //dumpLuceneDocument(file, documentWriter);
DocumentBuilder builder = DocumentHelper.createBuilder();
- org.w3c.dom.Document luceneDocument = builder.parse(new InputSource(
- new StringReader(documentWriter.toString())));
+ org.w3c.dom.Document luceneDocument = builder.parse(new InputSource(new StringReader(documentWriter.toString())));
NamespaceHelper helper = new NamespaceHelper(LUCENE_NAMESPACE, "luc", luceneDocument);
Element root = luceneDocument.getDocumentElement();
@@ -196,9 +215,10 @@
/**
* Writes the lucene XML document to a file.
*/
- protected static void dumpLuceneDocument(File file, Writer writer)
- throws IOException {
- File luceneDocumentFile = new File(file.getAbsolutePath() + ".xml");
+ protected void dumpLuceneDocument(File file, Writer writer) throws IOException {
+ log.debug(".dumpLuceneDocument(): Dump document: " + file.getAbsolutePath());
+
+ File luceneDocumentFile = new File(file.getAbsolutePath() + ".xluc");
luceneDocumentFile.createNewFile();
FileWriter fileWriter = new FileWriter(luceneDocumentFile);
1.9 +36 -76 cocoon-lenya/src/java/org/apache/lenya/lucene/index/ConfigurableIndexer.java
Index: ConfigurableIndexer.java
===================================================================
RCS file: /home/cvs/cocoon-lenya/src/java/org/apache/lenya/lucene/index/ConfigurableIndexer.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- ConfigurableIndexer.java 23 Jul 2003 13:21:27 -0000 1.8
+++ ConfigurableIndexer.java 13 Nov 2003 22:55:17 -0000 1.9
@@ -1,5 +1,4 @@
/*
-$Id$
<License>
============================================================================
@@ -57,62 +56,12 @@
import org.apache.lenya.xml.DocumentHelper;
+import org.apache.log4j.Category;
+
import org.w3c.dom.Element;
-/* ====================================================================
- * The Apache Software License, Version 1.1
- *
- * Copyright (c) 2001 The Apache Software Foundation. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * 3. The end-user documentation included with the redistribution,
- * if any, must include the following acknowledgment:
- * "This product includes software developed by the
- * Apache Software Foundation (http://www.apache.org/)."
- * Alternately, this acknowledgment may appear in the software itself,
- * if and wherever such third-party acknowledgments normally appear.
- *
- * 4. The names "Apache" and "Apache Software Foundation" and
- * "Apache Lucene" must not be used to endorse or promote products
- * derived from this software without prior written permission. For
- * written permission, please contact apache@apache.org.
- *
- * 5. Products derived from this software may not be called "Apache",
- * "Apache Lucene", nor may "Apache" appear in their name, without
- * prior written permission of the Apache Software Foundation.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
- * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
- * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- * ====================================================================
- *
- * This software consists of voluntary contributions made by many
- * individuals on behalf of the Apache Software Foundation. For more
- * information on the Apache Software Foundation, please see
- * <http://www.apache.org/>.
- */
import java.io.File;
+import java.io.FileFilter;
import java.io.StringWriter;
import java.io.Writer;
@@ -127,32 +76,33 @@
/**
- * DOCUMENT ME!
- *
- * @author $author$
- * @version $Revision$
+ * @author Andreas Hartmann
+ * @author Michael Wechner
+ * @version $Id$
*/
public class ConfigurableIndexer extends AbstractIndexer {
+ Category log = Category.getInstance(ConfigurableIndexer.class);
+
/**
- * DOCUMENT ME!
+ * Instantiate a Document Creator for creating Lucene Documents
*
- * @param element DOCUMENT ME!
+ * @param element <code>indexer</code> node
*
- * @return DOCUMENT ME!
+ * @return DocumentCreator
*
* @throws Exception DOCUMENT ME!
*/
- public DocumentCreator createDocumentCreator(Element element)
- throws Exception {
- // FIXME: ANT has a problem with Avalon, that's why we replaced Configuration by Element
-
- /*
- String configurationFileName = configuration.getChild("configuration").getAttribute("src");
- File configurationFile = new File(configurationFileName);
- String stylesheet = getStylesheet(configurationFile);
- return new ConfigurableDocumentCreator(stylesheet);
- */
- return null;
+ public DocumentCreator createDocumentCreator(Element indexer, String configFileName) throws Exception {
+ log.error(".createDocumentCreatort(): Element name: " + indexer.getNodeName());
+
+ String luceneDocConfigFileName = "cmfs-luceneDoc.xconf"; // indexer/configuration/@src
+
+ String configurationFileName = new File(configFileName).getParent() + File.separator + luceneDocConfigFileName; // configuration.getChild("configuration").getAttribute("src");
+ File configurationFile = new File(configurationFileName);
+ String stylesheet = getStylesheet(configurationFile);
+ return new ConfigurableDocumentCreator(stylesheet);
+
+ //return null;
}
public static final String CONFIGURATION_CREATOR_STYLESHEET = "org/apache/lenya/lucene/index/configuration2xslt.xsl";
@@ -160,20 +110,30 @@
/**
* Converts the configuration file to an XSLT stylesheet and returns a reader that reads this stylesheet.
*/
- protected static String getStylesheet(File configurationFile)
- throws Exception {
+ protected String getStylesheet(File configurationFile) throws Exception {
+ log.error(".getStylesheet(): Configuration file: " + configurationFile.getAbsolutePath());
+
URL configurationCreatorURL = ConfigurableIndexer.class.getClassLoader().getResource(CONFIGURATION_CREATOR_STYLESHEET);
File configurationStylesheetFile = new File(new URI(configurationCreatorURL.toString()));
org.w3c.dom.Document configurationDocument = DocumentHelper.readDocument(configurationFile);
TransformerFactory tFactory = TransformerFactory.newInstance();
- Transformer configurationTransformer = tFactory.newTransformer(new StreamSource(
- configurationStylesheetFile));
+ Transformer configurationTransformer = tFactory.newTransformer(new StreamSource(configurationStylesheetFile));
DOMSource source = new DOMSource(configurationDocument);
Writer stylesheetWriter = new StringWriter();
configurationTransformer.transform(source, new StreamResult(stylesheetWriter));
+ log.debug(".getStylesheet(): Meta Stylesheet: " + stylesheetWriter.toString());
+
return stylesheetWriter.toString();
+ }
+
+ /**
+ * Returns the filter used to receive the indexable files.
+ */
+ public FileFilter getFilter() {
+ String[] indexableExtensions = { "xml" };
+ return new AbstractIndexer.DefaultIndexFilter(indexableExtensions);
}
}
1.6 +2 -3 cocoon-lenya/src/java/org/apache/lenya/lucene/index/DefaultIndexer.java
Index: DefaultIndexer.java
===================================================================
RCS file: /home/cvs/cocoon-lenya/src/java/org/apache/lenya/lucene/index/DefaultIndexer.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- DefaultIndexer.java 23 Jul 2003 13:21:27 -0000 1.5
+++ DefaultIndexer.java 13 Nov 2003 22:55:17 -0000 1.6
@@ -76,8 +76,7 @@
*
* @throws Exception DOCUMENT ME!
*/
- public DocumentCreator createDocumentCreator(Element element)
- throws Exception {
+ public DocumentCreator createDocumentCreator(Element indexer, String configFileName) throws Exception {
return new DefaultDocumentCreator();
}
}
1.9 +4 -4 cocoon-lenya/src/java/org/apache/lenya/lucene/index/Index.java
Index: Index.java
===================================================================
RCS file: /home/cvs/cocoon-lenya/src/java/org/apache/lenya/lucene/index/Index.java,v
retrieving revision 1.8
retrieving revision 1.9
diff -u -r1.8 -r1.9
--- Index.java 23 Jul 2003 13:21:27 -0000 1.8
+++ Index.java 13 Nov 2003 22:55:17 -0000 1.9
@@ -110,8 +110,8 @@
if (debug.equalsIgnoreCase("false") || debug.equalsIgnoreCase("no")) {
DebugConfiguration.setDebug(false);
- } else if (debug.equalsIgnoreCase("false") || debug.equalsIgnoreCase("no")) {
- DebugConfiguration.setDebug(false);
+ } else if (debug.equalsIgnoreCase("true") || debug.equalsIgnoreCase("yes")) {
+ DebugConfiguration.setDebug(true);
} else {
System.err.println("ERROR: <debug> must be one of 'yes', 'true', 'no', or 'false'");
}
@@ -135,7 +135,7 @@
DOMUtil du = new DOMUtil();
Document config = new DOMParserFactory().getDocument(argv[0]);
- indexer.configure(du.getElement(config.getDocumentElement(), new XPath("indexer")));
+ indexer.configure(du.getElement(config.getDocumentElement(), new XPath("indexer")), argv[0]);
if (create) {
indexer.createIndex(root, index);
1.7 +4 -4 cocoon-lenya/src/java/org/apache/lenya/lucene/index/Indexer.java
Index: Indexer.java
===================================================================
RCS file: /home/cvs/cocoon-lenya/src/java/org/apache/lenya/lucene/index/Indexer.java,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- Indexer.java 23 Jul 2003 13:21:27 -0000 1.6
+++ Indexer.java 13 Nov 2003 22:55:17 -0000 1.7
@@ -1,5 +1,4 @@
/*
-$Id$
<License>
============================================================================
@@ -61,14 +60,15 @@
/**
- *
- * @author hrt
+ * @author Andreas Hartmann
+ * @author Michael Wechner
+ * @version $Id$
*/
public interface Indexer {
/**
* Configures this indexer.
*/
- void configure(Element element) throws Exception;
+ void configure(Element indexer, String configFileName) throws Exception;
/**
* Indexes the contents of a directory.
1.2 +4 -4 cocoon-lenya/src/java/org/apache/lenya/lucene/index/configuration2xslt.xsl
Index: configuration2xslt.xsl
===================================================================
RCS file: /home/cvs/cocoon-lenya/src/java/org/apache/lenya/lucene/index/configuration2xslt.xsl,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- configuration2xslt.xsl 22 Mar 2003 21:52:26 -0000 1.1
+++ configuration2xslt.xsl 13 Nov 2003 22:55:17 -0000 1.2
@@ -2,10 +2,10 @@
<!--
Document : configuration2xslt.xsl
- Created on : 17. M�rz 2003, 15:03
- Author : hrt
- Description:
- Purpose of transformation follows.
+ Created on : March 17, 2003, 15:03
+ Author : Andreas Hartmann
+ Author : Michael Wechner
+ Description: Generates meta stylesheet
-->
<xsl:stylesheet version="1.0"
---------------------------------------------------------------------
To unsubscribe, e-mail: lenya-cvs-unsubscribe@cocoon.apache.org
For additional commands, e-mail: lenya-cvs-help@cocoon.apache.org