You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lenya.apache.org by so...@apache.org on 2007/09/11 23:39:44 UTC

svn commit: r574702 [6/6] - in /lenya/branches/revolution/1.3.x: ./ lib/ src/java/org/apache/lenya/ac/ src/java/org/apache/lenya/ac/file/ src/java/org/apache/lenya/ac/impl/ src/java/org/apache/lenya/cms/ac/ src/java/org/apache/lenya/cms/ac/cocoon/ src/...

Modified: lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/SearchFiles.java
URL: http://svn.apache.org/viewvc/lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/SearchFiles.java?rev=574702&r1=574701&r2=574702&view=diff
==============================================================================
--- lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/SearchFiles.java (original)
+++ lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/SearchFiles.java Tue Sep 11 14:39:37 2007
@@ -14,15 +14,12 @@
  *  limitations under the License.
  *
  */
-
 /* $Id$  */
-
 package org.apache.lenya.lucene;
 
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.InputStreamReader;
-
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
 import org.apache.lucene.document.Document;
@@ -36,98 +33,78 @@
  * Command Line Interface
  */
 class SearchFiles {
-
     /**
      * main method
-     *
-     * @param args Directory of the index
+     * 
+     * @param args
+     *            Directory of the index
      */
     public static void main(String[] args) {
         if (args.length == 0) {
             System.err.println("Usage: org.apache.lenya.lucene.SearchFiles \"directory_where_index_is_located\" <word>");
             return;
         }
-
         File index_directory = new File(args[0]);
-
         if (!index_directory.exists()) {
-            System.err.println("Exception: No such directory: " +
-                index_directory.getAbsolutePath());
-
+            System.err.println("Exception: No such directory: " + index_directory.getAbsolutePath());
             return;
         }
-
-
         try {
             if (args.length > 1) {
                 Hits hits = new SearchFiles().search(args[1], index_directory);
                 return;
             }
-
             BufferedReader in = new BufferedReader(new InputStreamReader(System.in));
-
             while (true) {
                 System.out.print("Search: ");
-
                 String line = in.readLine();
-
                 if (line.length() == -1) {
                     break;
                 }
-
-		Hits hits = new SearchFiles().search(line, index_directory);
-
-                    System.out.print("\nAnother Search (y/n) ? ");
-                    line = in.readLine();
-
-                    if ((line.length() == 0) || (line.charAt(0) == 'n')) {
-                         break;
-                    }
+                Hits hits = new SearchFiles().search(line, index_directory);
+                System.out.print("\nAnother Search (y/n) ? ");
+                line = in.readLine();
+                if ((line.length() == 0) || (line.charAt(0) == 'n')) {
+                    break;
+                }
             }
-
         } catch (Exception e) {
             System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
         }
     }
-
     /**
-     *
+     * 
      */
     public Hits search(String line, File index_directory) throws Exception {
         Searcher searcher = new IndexSearcher(index_directory.getAbsolutePath());
         Analyzer analyzer = new StandardAnalyzer();
-
-        Query query = QueryParser.parse(line, "contents", analyzer);
+        // UPGRADE Lucene 1.3 -> 2.2
+        // Query query = QueryParser.parse(line, "contents", analyzer);
+        QueryParser qp = new QueryParser("contents", analyzer);
+        Query query = qp.parse(line);
         System.out.println("Searching for: " + query.toString("contents"));
-
-                Hits hits = searcher.search(query);
-                System.out.println("Total matching documents: " + hits.length());
-
-                final int HITS_PER_PAGE = 10;
-
-                for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
-                    int end = Math.min(hits.length(), start + HITS_PER_PAGE);
-
-                    for (int i = start; i < end; i++) {
-                        Document doc = hits.doc(i);
-                        String path = doc.get("path");
-
-                        if (path != null) {
-                            System.out.println(i + ". " + path);
-                        } else {
-                            String url = doc.get("url");
-
-                            if (url != null) {
-                                System.out.println(i + ". " + url);
-                                System.out.println("   - " + doc.get("title"));
-                            } else {
-                                System.out.println(i + ". " + "No path nor URL for this document");
-                            }
-                        }
+        Hits hits = searcher.search(query);
+        System.out.println("Total matching documents: " + hits.length());
+        final int HITS_PER_PAGE = 10;
+        for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
+            int end = Math.min(hits.length(), start + HITS_PER_PAGE);
+            for (int i = start; i < end; i++) {
+                Document doc = hits.doc(i);
+                String path = doc.get("path");
+                if (path != null) {
+                    System.out.println(i + ". " + path);
+                } else {
+                    String url = doc.get("url");
+                    if (url != null) {
+                        System.out.println(i + ". " + url);
+                        System.out.println("   - " + doc.get("title"));
+                    } else {
+                        System.out.println(i + ". " + "No path nor URL for this document");
                     }
-
                 }
-                searcher.close();
+            }
+        }
+        searcher.close();
         return hits;
     }
 }

Modified: lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/html/HtmlDocument.java
URL: http://svn.apache.org/viewvc/lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/html/HtmlDocument.java?rev=574702&r1=574701&r2=574702&view=diff
==============================================================================
--- lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/html/HtmlDocument.java (original)
+++ lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/html/HtmlDocument.java Tue Sep 11 14:39:37 2007
@@ -14,14 +14,11 @@
  *  limitations under the License.
  *
  */
-
 /* $Id$  */
-
 package org.apache.lenya.lucene.html;
 
-
 // Imports commented out since there is a name clash and fully
-// qualified class names will be used in the code.  Imports are
+// qualified class names will be used in the code. Imports are
 // left for ease of maintenance.
 import java.io.BufferedReader;
 import java.io.File;
@@ -30,7 +27,6 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.StringWriter;
-
 import org.apache.lucene.document.Field;
 import org.w3c.dom.Attr;
 import org.w3c.dom.Element;
@@ -39,228 +35,194 @@
 import org.w3c.dom.Text;
 import org.w3c.tidy.Tidy;
 
-
 /**
- * The <code>HtmlDocument</code> class creates a Lucene {@link org.apache.lucene.document.Document}
- * from an HTML document.
- *
+ * The <code>HtmlDocument</code> class creates a Lucene
+ * {@link org.apache.lucene.document.Document} from an HTML document.
+ * 
  * <P>
- * It does this by using JTidy package. It can take input input from {@link java.io.File} or {@link
- * java.io.InputStream}.
+ * It does this by using JTidy package. It can take input input from
+ * {@link java.io.File} or {@link java.io.InputStream}.
  * </p>
  */
 public class HtmlDocument {
     private Element rawDoc;
     private String luceneTagName = null;
     private String luceneClassValue = null;
-
     /**
      * Constructs an <code>HtmlDocument</code> from a {@link java.io.File}.
-     *
-     * @param file the <code>File</code> containing the HTML to parse
-     * @exception IOException if an I/O exception occurs
+     * 
+     * @param file
+     *            the <code>File</code> containing the HTML to parse
+     * @exception IOException
+     *                if an I/O exception occurs
      */
     public HtmlDocument(File file) throws IOException {
         Tidy tidy = new Tidy();
         tidy.setQuiet(true);
         tidy.setShowWarnings(false);
-
         org.w3c.dom.Document root = tidy.parseDOM(new FileInputStream(file), null);
         rawDoc = root.getDocumentElement();
     }
-
     /**
-     * Constructs an <code>HtmlDocument</code> from an {@link java.io.InputStream}.
-     *
-     * @param is the <code>InputStream</code> containing the HTML
-     * @exception IOException if I/O exception occurs
+     * Constructs an <code>HtmlDocument</code> from an
+     * {@link java.io.InputStream}.
+     * 
+     * @param is
+     *            the <code>InputStream</code> containing the HTML
+     * @exception IOException
+     *                if I/O exception occurs
      */
     public HtmlDocument(InputStream is) throws IOException {
         Tidy tidy = new Tidy();
         tidy.setQuiet(true);
         tidy.setShowWarnings(false);
-
         org.w3c.dom.Document root = tidy.parseDOM(is, null);
         rawDoc = root.getDocumentElement();
     }
-
     /**
-     * Creates a Lucene <code>Document</code> from an {@link java.io.InputStream}.
-     *
+     * Creates a Lucene <code>Document</code> from an
+     * {@link java.io.InputStream}.
+     * 
      * @param is
      * @return org.apache.lucene.document.Document
      * @exception IOException
      */
-    public static org.apache.lucene.document.Document getDocument(InputStream is)
-        throws IOException {
+    public static org.apache.lucene.document.Document getDocument(InputStream is) throws IOException {
         HtmlDocument htmlDoc = new HtmlDocument(is);
         org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
-
-        luceneDoc.add(Field.Text("title", htmlDoc.getTitle()));
-        luceneDoc.add(Field.Text("contents", htmlDoc.getBody()));
-
+        // luceneDoc.add(Field.Text("title", htmlDoc.getTitle()));
+        luceneDoc.add(new Field("title", htmlDoc.getTitle(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
+        // luceneDoc.add(Field.Text("contents", htmlDoc.getBody()));
+        luceneDoc.add(new Field("contents", htmlDoc.getBody(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
         return luceneDoc;
     }
-
     /**
      * Creates a Lucene <code>Document</code> from a {@link java.io.File}.
-     *
+     * 
      * @param file
      * @return org.apache.lucene.document.Document
      * @exception IOException
      */
-    public static org.apache.lucene.document.Document Document(File file)
-        throws IOException {
+    public static org.apache.lucene.document.Document Document(File file) throws IOException {
         HtmlDocument htmlDoc = new HtmlDocument(file);
         org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
-
-        luceneDoc.add(Field.Text("title", htmlDoc.getTitle()));
-        luceneDoc.add(Field.Text("contents", htmlDoc.getBody()));
-
+        // luceneDoc.add(Field.Text("title", htmlDoc.getTitle()));
+        luceneDoc.add(new Field("title", htmlDoc.getTitle(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
+        // luceneDoc.add(Field.Text("contents", htmlDoc.getBody()));
+        luceneDoc.add(new Field("contents", htmlDoc.getBody(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
         String contents = null;
         BufferedReader br = new BufferedReader(new FileReader(file));
         StringWriter sw = new StringWriter();
         String line = br.readLine();
-
         while (line != null) {
             sw.write(line);
             line = br.readLine();
         }
-
         br.close();
         contents = sw.toString();
         sw.close();
-
-        luceneDoc.add(Field.UnIndexed("rawcontents", contents));
-
+        // luceneDoc.add(Field.UnIndexed("rawcontents", contents));
+        luceneDoc.add(new Field("rawcontents", contents, Field.Store.YES, Field.Index.NO));
         return luceneDoc;
     }
-
     /**
      * Gets the title attribute of the <code>HtmlDocument</code> object.
-     *
+     * 
      * @return the title value
      */
     public String getTitle() {
         if (rawDoc == null) {
             return null;
         }
-
         String title = "";
-
         NodeList nl = rawDoc.getElementsByTagName("title");
-
         if (nl.getLength() > 0) {
             Element titleElement = ((Element) nl.item(0));
             Text text = (Text) titleElement.getFirstChild();
-
             if (text != null) {
                 title = text.getData();
             }
         }
-
         return title;
     }
-
     /**
      * Gets the body text attribute of the <code>HtmlDocument</code> object.
-     *
+     * 
      * @return the body text value
      */
     public String getBody() {
         if (rawDoc == null) {
             return null;
         }
-
-        // NOTE: JTidy will insert a meta tag: <meta name="generator" content="HTML Tidy, see www.w3.org" />
-        //       This means that getLength is always greater than 0
+        // NOTE: JTidy will insert a meta tag: <meta name="generator"
+        // content="HTML Tidy, see www.w3.org" />
+        // This means that getLength is always greater than 0
         NodeList metaNL = rawDoc.getElementsByTagName("meta");
-
         for (int i = 0; i < metaNL.getLength(); i++) {
             Element metaElement = (Element) metaNL.item(i);
             Attr nameAttr = metaElement.getAttributeNode("name");
             Attr valueAttr = metaElement.getAttributeNode("value");
-
             if ((nameAttr != null) && (valueAttr != null)) {
                 if (nameAttr.getValue().equals("lucene-tag-name")) {
                     luceneTagName = valueAttr.getValue();
                 }
-
                 if (nameAttr.getValue().equals("lucene-class-value")) {
                     luceneClassValue = valueAttr.getValue();
                 }
             }
         }
-
         boolean indexByLucene = true;
-
         if ((luceneTagName != null) && (luceneClassValue != null)) {
             indexByLucene = false;
         }
-
         System.out.println("HtmlDocument.getBody(): Index By Lucene (Default): " + indexByLucene);
-
         String body = "";
         NodeList nl = rawDoc.getElementsByTagName("body");
-
         if (nl.getLength() > 0) {
             body = getBodyText(nl.item(0), indexByLucene);
         }
-
         return body;
     }
-
     /**
      * Gets the bodyText attribute of the <code>HtmlDocument</code> object.
-     *
-     * @param node a DOM Node
-     * @param indexByLucene DOCUMENT ME!
+     * 
+     * @param node
+     *            a DOM Node
+     * @param indexByLucene
+     *            DOCUMENT ME!
      * @return The bodyText value
      */
     private String getBodyText(Node node, boolean indexByLucene) {
         NodeList nl = node.getChildNodes();
         StringBuffer buffer = new StringBuffer();
-
         for (int i = 0; i < nl.getLength(); i++) {
             boolean index = indexByLucene;
             Node child = nl.item(i);
-
             switch (child.getNodeType()) {
             case Node.ELEMENT_NODE:
-
                 if ((luceneTagName != null) && (luceneClassValue != null)) {
                     if (child.getNodeName().equals(luceneTagName)) {
                         Attr attribute = ((Element) child).getAttributeNode("class");
-
                         if (attribute != null) {
                             if (attribute.getValue().equals(luceneClassValue)) {
-                                System.out.println("HtmlDocument.getBodyText(): <" + luceneTagName +
-                                    " class=\"" + luceneClassValue + "\"> found!");
+                                System.out.println("HtmlDocument.getBodyText(): <" + luceneTagName + " class=\"" + luceneClassValue + "\"> found!");
                                 index = true;
                             }
-
                         }
                     }
                 }
-
                 buffer.append(getBodyText(child, index));
-
                 if (index) {
                     buffer.append(" ");
                 }
-
                 break;
-
             case Node.TEXT_NODE:
-
                 if (indexByLucene) {
                     buffer.append(((Text) child).getData());
                 }
-
                 break;
             }
         }
-
         return buffer.toString();
     }
 }

Modified: lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/index/AbstractDocumentCreator.java
URL: http://svn.apache.org/viewvc/lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/index/AbstractDocumentCreator.java?rev=574702&r1=574701&r2=574702&view=diff
==============================================================================
--- lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/index/AbstractDocumentCreator.java (original)
+++ lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/index/AbstractDocumentCreator.java Tue Sep 11 14:39:37 2007
@@ -14,80 +14,86 @@
  *  limitations under the License.
  *
  */
-
 /* $Id$  */
-
 package org.apache.lenya.lucene.index;
 
 import java.io.File;
-
-import org.apache.lucene.document.DateField;
+import org.apache.log4j.Category;
+import org.apache.lucene.document.DateTools;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 
-import org.apache.log4j.Category;
-
 public class AbstractDocumentCreator implements DocumentCreator {
     Category log = Category.getInstance(AbstractDocumentCreator.class);
-
     /** Creates a new instance of AbstractDocumentCreator */
     public AbstractDocumentCreator() {
     }
-
     /**
      * DOCUMENT ME!
-     *
-     * @param file DOCUMENT ME!
-     * @param htdocsDumpDir DOCUMENT ME!
-     *
+     * 
+     * @param file
+     *            DOCUMENT ME!
+     * @param htdocsDumpDir
+     *            DOCUMENT ME!
+     * 
      * @return DOCUMENT ME!
-     *
-     * @throws Exception DOCUMENT ME!
+     * 
+     * @throws Exception
+     *             DOCUMENT ME!
      */
-    public Document getDocument(File file, File htdocsDumpDir)
-        throws Exception {
+    public Document getDocument(File file, File htdocsDumpDir) throws Exception {
         // make a new, empty document
         Document doc = new Document();
-
-        // Add the url as a field named "url".  Use an UnIndexed field, so
+        // Add the url as a field named "url". Use an UnIndexed field, so
         // that the url is just stored with the document, but is not searchable.
-        String requestURI = file.getPath().replace(File.separatorChar, '/').substring(htdocsDumpDir.getPath()
-                                                                                                   .length());
+        String requestURI = file.getPath().replace(File.separatorChar, '/').substring(htdocsDumpDir.getPath().length());
         if (requestURI.substring(requestURI.length() - 8).equals(".pdf.txt")) {
-            requestURI = requestURI.substring(0, requestURI.length() - 4); // Remove .txt extension from PDF text file
+            requestURI = requestURI.substring(0, requestURI.length() - 4); // Remove
+            // .txt
+            // extension
+            // from
+            // PDF
+            // text
+            // file
         }
-
-        doc.add(Field.UnIndexed("url", requestURI));
-
+        // doc.add(Field.UnIndexed("url", requestURI));
+        doc.add(new Field("url", requestURI, Field.Store.YES, Field.Index.NO));
         // Add the mime-type as a field named "mime-type"
         if (requestURI.substring(requestURI.length() - 5).equals(".html")) {
-            doc.add(Field.UnIndexed("mime-type", "text/html"));
+            // doc.add(Field.UnIndexed("mime-type", "text/html"));
+            doc.add(new Field("mime-type", "text/html", Field.Store.YES, Field.Index.NO));
         } else if (requestURI.substring(requestURI.length() - 4).equals(".txt")) {
-            doc.add(Field.UnIndexed("mime-type", "text/plain"));
+            // doc.add(Field.UnIndexed("mime-type", "text/plain"));
+            doc.add(new Field("mime-type", "text/plain", Field.Store.YES, Field.Index.NO));
         } else if (requestURI.substring(requestURI.length() - 4).equals(".pdf")) {
-            doc.add(Field.UnIndexed("mime-type", "application/pdf"));
+            // doc.add(Field.UnIndexed("mime-type", "application/pdf"));
+            doc.add(new Field("mime-type", "application/pdf", Field.Store.YES, Field.Index.NO));
         } else {
             // Don't add any mime-type field
-            //doc.add(Field.UnIndexed("mime-type", "null"));
+            // doc.add(Field.UnIndexed("mime-type", "null"));
         }
-
-        // Add the last modified date of the file a field named "modified".  Use a
-        // Keyword field, so that it's searchable, but so that no attempt is made
+        // Add the last modified date of the file a field named "modified". Use
+        // a
+        // Keyword field, so that it's searchable, but so that no attempt is
+        // made
         // to tokenize the field into words.
-        doc.add(Field.Keyword("modified", DateField.timeToString(file.lastModified())));
-
+        // doc.add(Field.Keyword("modified",
+        // DateField.timeToString(file.lastModified())));
+        doc.add(new Field("modified", DateTools.timeToString(file.lastModified(), DateTools.Resolution.MILLISECOND), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.YES));
         // Add the id as a field, so that index can be incrementally maintained.
-	String id = IndexIterator.createID(file, htdocsDumpDir);
+        String id = IndexIterator.createID(file, htdocsDumpDir);
         log.debug(id);
-        doc.add(Field.Keyword("id", id));
-
-        // Add the uid as a field, so that index can be incrementally maintained.
+        // doc.add(Field.Keyword("id", id));
+        doc.add(new Field("id", id, Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.YES));
+        // Add the uid as a field, so that index can be incrementally
+        // maintained.
         // This field is not stored with document, it is indexed, but it is not
         // tokenized prior to indexing.
-	String uid = IndexIterator.createUID(file, htdocsDumpDir);
+        String uid = IndexIterator.createUID(file, htdocsDumpDir);
         log.debug(uid);
-        doc.add(new Field("uid", uid, false, true, false));
-
+        doc.add(new Field("uid", uid,
+        // false, true, false));
+                Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.NO));
         return doc;
     }
 }

Modified: lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/index/AbstractIndexer.java
URL: http://svn.apache.org/viewvc/lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/index/AbstractIndexer.java?rev=574702&r1=574701&r2=574702&view=diff
==============================================================================
--- lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/index/AbstractIndexer.java (original)
+++ lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/index/AbstractIndexer.java Tue Sep 11 14:39:37 2007
@@ -14,16 +14,13 @@
  *  limitations under the License.
  *
  */
-
 /* $Id$  */
-
 package org.apache.lenya.lucene.index;
 
 import java.io.File;
 import java.io.FileFilter;
 import java.io.IOException;
 import java.util.Arrays;
-
 import org.apache.log4j.Category;
 import org.apache.lenya.lucene.IndexConfiguration;
 import org.apache.lucene.analysis.standard.StandardAnalyzer;
@@ -31,34 +28,29 @@
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.Term;
-
 import org.w3c.dom.Element;
 
 /**
- * Abstract base class for indexers.
- * The factory method {@link #getDocumentCreator(String[])} is used to create a
- * DocumentCreator from the command-line arguments.
+ * Abstract base class for indexers. The factory method
+ * {@link #getDocumentCreator(String[])} is used to create a DocumentCreator
+ * from the command-line arguments.
  */
 public abstract class AbstractIndexer implements Indexer {
-    private static Category log = Category.getInstance(AbstractIndexer.class); 
-    
+    private static Category log = Category.getInstance(AbstractIndexer.class);
     private DocumentCreator documentCreator;
     private Element indexer;
     private String configFileName;
-
     /**
      * Creates a new instance of AbstractIndexer
      */
     public AbstractIndexer() {
     }
-
     /**
      * Returns the DocumentCreator of this indexer.
      */
     protected DocumentCreator getDocumentCreator() {
         return documentCreator;
     }
-
     /**
      * Initializes this indexer with command-line parameters.
      */
@@ -67,100 +59,97 @@
         this.indexer = indexer;
         this.configFileName = configFileName;
     }
-
     /**
      * DOCUMENT ME!
-     *
-     * @param element DOCUMENT ME!
-     *
+     * 
+     * @param element
+     *            DOCUMENT ME!
+     * 
      * @return DOCUMENT ME!
-     *
-     * @throws Exception DOCUMENT ME!
+     * 
+     * @throws Exception
+     *             DOCUMENT ME!
      */
     public abstract DocumentCreator createDocumentCreator(Element indexer, String configFileName) throws Exception;
-
     /**
-     * Updates the index incrementally.
-     * Walk directory hierarchy in uid order, while keeping uid iterator from
-     * existing index in sync.  Mismatches indicate one of:
+     * Updates the index incrementally. Walk directory hierarchy in uid order,
+     * while keeping uid iterator from existing index in sync. Mismatches
+     * indicate one of:
      * <ol>
-     *   <li>old documents to be deleted</li>
-     *   <li>unchanged documents, to be left alone, or</li>
-     *   <li>new documents, to be indexed.</li>
+     * <li>old documents to be deleted</li>
+     * <li>unchanged documents, to be left alone, or</li>
+     * <li>new documents, to be indexed.</li>
      * </ol>
      */
     public void updateIndex(File dumpDirectory, File index) throws Exception {
         deleteStaleDocuments(dumpDirectory, index);
         doIndex(dumpDirectory, index, false);
     }
-
     /**
      * Updates the index re one document
-     *
+     * 
      * <ol>
-     *   <li>old documents to be deleted</li>
-     *   <li>unchanged documents, to be left alone, or</li>
-     *   <li>new documents, to be indexed.</li>
+     * <li>old documents to be deleted</li>
+     * <li>unchanged documents, to be left alone, or</li>
+     * <li>new documents, to be indexed.</li>
      * </ol>
      */
     public void indexDocument(File file) throws Exception {
         IndexConfiguration config = new IndexConfiguration(configFileName);
         log.debug("File: " + file);
-
         File dumpDir = new File(config.resolvePath(config.getHTDocsDumpDir()));
         log.debug("Dump dir: " + dumpDir);
-
         File indexDir = new File(config.resolvePath(config.getIndexDir()));
         log.debug("Index dir: " + indexDir);
-
-
-	String id = IndexIterator.createID(file, dumpDir);
-
-	boolean createNewIndex = false;
+        String id = IndexIterator.createID(file, dumpDir);
+        boolean createNewIndex = false;
         if (!IndexReader.indexExists(indexDir)) {
             log.warn("Index does not exist yet: " + indexDir);
             createNewIndex = true;
         } else {
-	    // Delete from index
+            // Delete from index
             IndexReader reader = IndexReader.open(indexDir.getAbsolutePath());
-	    Term term = new Term("id", id);
+            Term term = new Term("id", id);
             log.debug(term.toString());
-            int numberOfDeletedDocuments = reader.delete(term);
+            int numberOfDeletedDocuments = reader.deleteDocuments(term);
             if (numberOfDeletedDocuments == 1) {
                 log.info("Document has been deleted: " + term);
             } else {
                 log.warn("No such document found in this index: " + term);
             }
-            //log.debug("Number of deleted documents: " + numberOfDeletedDocuments);
-            //log.debug("Current number of documents in this index: " + reader.numDocs());
+            // log.debug("Number of deleted documents: " +
+            // numberOfDeletedDocuments);
+            // log.debug("Current number of documents in this index: " +
+            // reader.numDocs());
             reader.close();
         }
-
-	// Append to index
+        // Append to index
         Document doc = getDocumentCreator().getDocument(new File(dumpDir, id), dumpDir);
         IndexWriter writer = new IndexWriter(indexDir, new StandardAnalyzer(), createNewIndex);
-        writer.maxFieldLength = 1000000;
+        writer.setMaxFieldLength(1000000);
         writer.addDocument(doc);
-        //log.debug("Document has been added: " + doc);
+        // log.debug("Document has been added: " + doc);
         log.info("Document has been added: " + id);
         writer.optimize();
         writer.close();
     }
-
     /**
      * Creates a new index.
      */
-    public void createIndex(File dumpDirectory, File index)
-        throws Exception {
+    public void createIndex(File dumpDirectory, File index) throws Exception {
         doIndex(dumpDirectory, index, true);
     }
-
     /**
      * Index files
-     *
-     * @param dumpDirectory Directory where the files to be indexed are located
-     * @param index Directory where the index shall be located
-     * @param create <strong>true</strong> means the index will be created from scratch, <strong>false</strong> means it will be indexed incrementally
+     * 
+     * @param dumpDirectory
+     *            Directory where the files to be indexed are located
+     * @param index
+     *            Directory where the index shall be located
+     * @param create
+     *            <strong>true</strong> means the index will be created from
+     *            scratch, <strong>false</strong> means it will be indexed
+     *            incrementally
      */
     public void doIndex(File dumpDirectory, File index, boolean create) {
         if (!index.isDirectory()) {
@@ -169,56 +158,46 @@
         }
         try {
             IndexWriter writer = new IndexWriter(index.getAbsolutePath(), new StandardAnalyzer(), create);
-            writer.maxFieldLength = 1000000;
-
+            writer.setMaxFieldLength(1000000);
             IndexInformation info = new IndexInformation(index.getAbsolutePath(), dumpDirectory, getFilter(indexer, configFileName), create);
-
             IndexHandler handler;
-
             if (create) {
                 handler = new CreateIndexHandler(dumpDirectory, info, writer);
             } else {
                 handler = new UpdateIndexHandler(dumpDirectory, info, writer);
             }
-
             IndexIterator iterator = new IndexIterator(index.getAbsolutePath(), getFilter(indexer, configFileName));
             iterator.addHandler(handler);
             iterator.iterate(dumpDirectory);
-
             writer.optimize();
             writer.close();
         } catch (IOException e) {
             log.error(e);
         }
     }
-
     /**
      * Delete the stale documents.
      */
-    protected void deleteStaleDocuments(File dumpDirectory, File index)
-        throws Exception {
+    protected void deleteStaleDocuments(File dumpDirectory, File index) throws Exception {
         log.debug("Deleting stale documents");
-
         IndexIterator iterator = new IndexIterator(index.getAbsolutePath(), getFilter(indexer, configFileName));
         iterator.addHandler(new DeleteHandler());
         iterator.iterate(dumpDirectory);
         log.debug("Deleting stale documents finished");
     }
-
     /**
-     * Returns the filter used to receive the indexable files. Might be overwritten by inherited class.
+     * Returns the filter used to receive the indexable files. Might be
+     * overwritten by inherited class.
      */
     public FileFilter getFilter(Element indexer, String configFileName) {
         String[] indexableExtensions = { "html", "htm", "txt" };
         return new AbstractIndexer.DefaultIndexFilter(indexableExtensions);
     }
-
     /**
      * FileFilter used to obtain the files to index.
      */
     public class DefaultIndexFilter implements FileFilter {
         protected String[] indexableExtensions;
-
         /**
          * Default indexable extensions: html, htm, txt
          */
@@ -226,24 +205,24 @@
             String[] iE = { "html", "htm", "txt" };
             indexableExtensions = iE;
         }
-
         /**
-         *
+         * 
          */
         public DefaultIndexFilter(String[] indexableExtensions) {
             this.indexableExtensions = indexableExtensions;
         }
-
-        /** Tests whether or not the specified abstract pathname should be
+        /**
+         * Tests whether or not the specified abstract pathname should be
          * included in a pathname list.
-         *
-         * @param  pathname  The abstract pathname to be tested
-         * @return  <code>true</code> if and only if <code>pathname</code> should be included
-         *
+         * 
+         * @param pathname
+         *            The abstract pathname to be tested
+         * @return <code>true</code> if and only if <code>pathname</code>
+         *         should be included
+         * 
          */
         public boolean accept(File file) {
             boolean accept;
-
             if (file.isDirectory()) {
                 accept = true;
             } else {
@@ -251,105 +230,95 @@
                 String extension = fileName.substring(fileName.lastIndexOf(".") + 1);
                 accept = Arrays.asList(indexableExtensions).contains(extension);
             }
-
             return accept;
         }
     }
-
     /**
-     * Deletes all stale documents up to the document representing the next file.
-     * The following documents are deleted:
+     * Deletes all stale documents up to the document representing the next
+     * file. The following documents are deleted:
      * <ul>
-     *   <li>representing files that where removed</li>
-     *   <li>representing the same file but are older than the current file</li>
+     * <li>representing files that where removed</li>
+     * <li>representing the same file but are older than the current file</li>
      * </ul>
      */
     public class DeleteHandler extends AbstractIndexIteratorHandler {
-        /** Handles a stale document.
-         *
+        /**
+         * Handles a stale document.
+         * 
          */
         public void handleStaleDocument(IndexReader reader, Term term) {
-            log.debug("deleting " +
-                IndexIterator.uid2url(term.text()));
-
+            log.debug("deleting " + IndexIterator.uid2url(term.text()));
             try {
-                int deletedDocuments = reader.delete(term);
-                log.debug("deleted " + deletedDocuments +
-                    " documents.");
+                int deletedDocuments = reader.deleteDocuments(term);
+                log.debug("deleted " + deletedDocuments + " documents.");
             } catch (IOException e) {
                 log.error(e);
             }
         }
     }
-
     /**
      * DOCUMENT ME!
      */
     public class IndexHandler extends AbstractIndexIteratorHandler {
         /**
          * Creates a new IndexHandler object.
-         *
-         * @param dumpDirectory DOCUMENT ME!
-         * @param info DOCUMENT ME!
-         * @param writer DOCUMENT ME!
+         * 
+         * @param dumpDirectory
+         *            DOCUMENT ME!
+         * @param info
+         *            DOCUMENT ME!
+         * @param writer
+         *            DOCUMENT ME!
          */
         public IndexHandler(File dumpDirectory, IndexInformation info, IndexWriter writer) {
             this.info = info;
             this.dumpDirectory = dumpDirectory;
             this.writer = writer;
         }
-
         private IndexInformation info;
-
         protected IndexInformation getInformation() {
             return info;
         }
-
         private File dumpDirectory;
-
         protected File getDumpDirectory() {
             return dumpDirectory;
         }
-
         private IndexWriter writer;
-
         protected IndexWriter getWriter() {
             return writer;
         }
-
         /**
-	 * Add document to index
-	 */
+         * Add document to index
+         */
         protected void addFile(File file) {
             log.debug("adding document: " + file.getAbsolutePath());
-
             try {
                 Document doc = getDocumentCreator().getDocument(file, dumpDirectory);
                 writer.addDocument(doc);
             } catch (Exception e) {
                 log.error(e);
             }
-
             info.increase();
             log.info(info.printProgress());
         }
     }
-
     /**
      * DOCUMENT ME!
      */
     public class CreateIndexHandler extends IndexHandler {
         /**
          * Creates a new CreateIndexHandler object.
-         *
-         * @param dumpDirectory DOCUMENT ME!
-         * @param info DOCUMENT ME!
-         * @param writer DOCUMENT ME!
+         * 
+         * @param dumpDirectory
+         *            DOCUMENT ME!
+         * @param info
+         *            DOCUMENT ME!
+         * @param writer
+         *            DOCUMENT ME!
          */
         public CreateIndexHandler(File dumpDirectory, IndexInformation info, IndexWriter writer) {
             super(dumpDirectory, info, writer);
         }
-
         /**
          * Handles a file. Used when creating a new index.
          */
@@ -357,22 +326,23 @@
             addFile(file);
         }
     }
-
     /**
      * DOCUMENT ME!
      */
     public class UpdateIndexHandler extends IndexHandler {
         /**
          * Creates a new UpdateIndexHandler object.
-         *
-         * @param dumpDirectory DOCUMENT ME!
-         * @param info DOCUMENT ME!
-         * @param writer DOCUMENT ME!
+         * 
+         * @param dumpDirectory
+         *            DOCUMENT ME!
+         * @param info
+         *            DOCUMENT ME!
+         * @param writer
+         *            DOCUMENT ME!
          */
         public UpdateIndexHandler(File dumpDirectory, IndexInformation info, IndexWriter writer) {
             super(dumpDirectory, info, writer);
         }
-
         /**
          * Handles a new document. Used when updating the index.
          */

Modified: lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/index/DefaultDocumentCreator.java
URL: http://svn.apache.org/viewvc/lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/index/DefaultDocumentCreator.java?rev=574702&r1=574701&r2=574702&view=diff
==============================================================================
--- lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/index/DefaultDocumentCreator.java (original)
+++ lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/lucene/index/DefaultDocumentCreator.java Tue Sep 11 14:39:37 2007
@@ -14,46 +14,44 @@
  *  limitations under the License.
  *
  */
-
 /* $Id$  */
-
 package org.apache.lenya.lucene.index;
 
 import java.io.File;
-
 import org.apache.lenya.lucene.parser.HTMLParser;
 import org.apache.lenya.lucene.parser.HTMLParserFactory;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 
 public class DefaultDocumentCreator extends AbstractDocumentCreator {
-
-    /** 
+    /**
      * Creates a new instance of DefaultDocumentCreator
      */
     public DefaultDocumentCreator() {
     }
-
     /**
      * DOCUMENT ME!
-     *
-     * @param file DOCUMENT ME!
-     * @param htdocsDumpDir DOCUMENT ME!
-     *
+     * 
+     * @param file
+     *            DOCUMENT ME!
+     * @param htdocsDumpDir
+     *            DOCUMENT ME!
+     * 
      * @return DOCUMENT ME!
-     *
-     * @throws Exception DOCUMENT ME!
+     * 
+     * @throws Exception
+     *             DOCUMENT ME!
      */
     public Document getDocument(File file, File htdocsDumpDir) throws Exception {
         Document document = super.getDocument(file, htdocsDumpDir);
-
         HTMLParser parser = HTMLParserFactory.newInstance(file);
         parser.parse(file);
-
-        document.add(Field.Text("title", parser.getTitle()));
-        document.add(Field.Text("keywords", parser.getKeywords()));
-        document.add(Field.Text("contents", parser.getReader()));
-
+        // document.add(Field.Text("title", parser.getTitle()));
+        document.add(new Field("title", parser.getTitle(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
+        // document.add(Field.Text("keywords", parser.getKeywords()));
+        document.add(new Field("keywords", parser.getKeywords(), Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.YES));
+        // document.add(Field.Text("contents", parser.getReader()));
+        document.add(new Field("contents", parser.getReader()));
         return document;
     }
 }

Modified: lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/util/CacheMap.java
URL: http://svn.apache.org/viewvc/lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/util/CacheMap.java?rev=574702&r1=574701&r2=574702&view=diff
==============================================================================
--- lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/util/CacheMap.java (original)
+++ lenya/branches/revolution/1.3.x/src/java/org/apache/lenya/util/CacheMap.java Tue Sep 11 14:39:37 2007
@@ -38,7 +38,7 @@
      * @param capacity The maximum number of entries.
      */
     public CacheMap(int capacity) {
-        assert capacity > -1;
+//        assert capacity > -1;
         this.capacity = capacity;
     }
     



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@lenya.apache.org
For additional commands, e-mail: commits-help@lenya.apache.org