You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by ke...@apache.org on 2002/05/08 17:52:24 UTC

cvs commit: jakarta-lucene-sandbox/projects/appex/src/java/search AbstractDataSource.java DataSource.java DocumentHandler.java FSDataSource.java IllegalConfigurationException.java SearchConfiguration.java SearchIndexer.java

kelvint     02/05/08 08:52:23

  Modified:    projects/appex/src/java/search AbstractDataSource.java
                        DataSource.java DocumentHandler.java
                        FSDataSource.java
                        IllegalConfigurationException.java
                        SearchConfiguration.java SearchIndexer.java
  Log:
  Importing the classes seem to have warped the whitespaces. Here's my attempt to get things back to normal.
  
  Introduced new datasource and contenthandler mechanism. It's quite a major alteration for individual changes to be enumerated.
  
  Revision  Changes    Path
  1.2       +87 -74    jakarta-lucene-sandbox/projects/appex/src/java/search/AbstractDataSource.java
  
  Index: AbstractDataSource.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/AbstractDataSource.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- AbstractDataSource.java	4 May 2002 15:43:46 -0000	1.1
  +++ AbstractDataSource.java	8 May 2002 15:52:23 -0000	1.2
  @@ -1,75 +1,88 @@
  -package search;
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Turbine" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  - */
  -
  -/**
  - * Generic implementation of a datasource.
  - *
  - * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  - */
  -public abstract class AbstractDataSource implements DataSource
  -{
  -    protected SearchConfiguration config;
  -
  -    public AbstractDataSource(SearchConfiguration config)
  -    {
  -        this.config = config;
  -    }
  -
  -    public SearchConfiguration getConfig()
  -    {
  -        return this.config;
  -    }
  +package search;
  +
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache POI" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Lucene", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +
  +import java.util.Map;
  +import java.util.Set;
  +
  +/**
  + * Generic implementation of a datasource.
  + */
  +public abstract class AbstractDataSource implements DataSource
  +{
  +    protected AbstractDataSource()
  +    {
  +    }
  +
  +    protected AbstractDataSource(Map map)
  +    {
  +        loadFields(map);
  +    }
  +
  +    /**
  +     * Fields to index.
  +     */
  +    protected String[] fields;
  +
  +    /**
  +     * Convenience method to load fields to index into a Map.
  +     */
  +    protected void loadFields(Map map)
  +    {
  +        Set fieldSet = map.keySet();
  +        fields = new String[fieldSet.size()];
  +        fieldSet.toArray(fields);
  +    }
   }
  
  
  
  1.2       +99 -79    jakarta-lucene-sandbox/projects/appex/src/java/search/DataSource.java
  
  Index: DataSource.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/DataSource.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- DataSource.java	4 May 2002 15:43:46 -0000	1.1
  +++ DataSource.java	8 May 2002 15:52:23 -0000	1.2
  @@ -1,81 +1,101 @@
   package search;
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Turbine" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  - */
  -
  -import java.util.List;
  -
  -/**
  - * A datasource is any source of data (filesystem, database, URL, etc)
  - * which is indexed by SearchIndexer.
  - *
  - * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  - */
  -public interface DataSource
  -{
  -    public static final String OBJECT_CLASS = "objectClass";
  -    public static final String OBJECT_IDENTIFIER = "objectid";
  -
  -    /**
  -     * Retrieve a list of Maps. Each map represents the
  -     * a document to be indexed. The key:value pair of the map
  -     * is the data of the document.
  -     */
  -    public List getData() throws Exception;
  -
  -    /**
  -     * Obtain the SearchConfiguration object used to configure the datasource.
  -     */
  -    public SearchConfiguration getConfig();
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache POI" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Lucene", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +
  +import java.util.Map;
  +
  +/**
  + * A datasource is any source of data (filesystem, database, URL, etc)
  + * which is indexed by SearchIndexer.
  + */
  +public interface DataSource
  +{
  +    /**
  +     * Key in the map (located in the list returned by getData)
  +     * to represent the class name of the object being indexed.
  +     */
  +    public static final String OBJECT_CLASS = "objectClass";
  +
  +    /**
  +     * Key in the map (located in the list returned by getData)
  +     * to represent the uuid of the object being indexed.
  +     */
  +    public static final String OBJECT_IDENTIFIER = "objectId";
  +
  +    /**
  +     * The key in the map (located in the list returned by getData)
  +     * to represent nested datasources.
  +     */
  +    public static final String NESTED_DATASOURCE = "nestedDataSource";
  +
  +    /**
  +     * Key in the map (located in the list returned by getData)
  +     * to represent the id of the datasource's container. Applies to
  +     * nested datasources.
  +     */
  +    public static final String CONTAINER_IDENTIFIER = "containerId";
  +
  +    /**
  +     * Key in the map to represent the class name of the Search Result
  +     * object for this datasource (if any).
  +     */
  +    public static final String SEARCH_RESULT_CLASSNAME = "resultClassname";
  +
  +    /**
  +     * Retrieve a array of Maps. Each map represents the
  +     * a document to be indexed. The key:value pair of the map
  +     * is the metadata of the document.
  +     */
  +    public Map[] getData() throws Exception;
   }
  
  
  
  1.2       +317 -231  jakarta-lucene-sandbox/projects/appex/src/java/search/DocumentHandler.java
  
  Index: DocumentHandler.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/DocumentHandler.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- DocumentHandler.java	4 May 2002 15:43:46 -0000	1.1
  +++ DocumentHandler.java	8 May 2002 15:52:23 -0000	1.2
  @@ -1,231 +1,317 @@
  -package search;
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Turbine" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  - */
  -
  -import org.apache.log4j.Category;
  -import org.apache.lucene.document.DateField;
  -import org.apache.lucene.document.Document;
  -import org.apache.lucene.document.Field;
  -import org.apache.lucene.index.IndexWriter;
  -
  -import java.io.File;
  -import java.io.IOException;
  -import java.util.Iterator;
  -import java.util.List;
  -import java.util.Map;
  -
  -import search.util.IOUtils;
  -import search.contenthandler.FileContentHandler;
  -import search.contenthandler.ContentHandlerFactory;
  -
  -/**
  - * <p>
  - * A document is the atomic unit used for indexing purposes. It consists of
  - * metadata as well as its file contents. File contents are handled by {@link FileContentHandler}.
  - * </p>
  - * <p>
  - * DocumentHandler creates the {@link org.apache.lucene.document.Document},
  - * adds the standard fields to it, delegates to {@link FileContentHandler} to handle
  - * file contents, then adds to the {@link org.apache.lucene.index.IndexWriter}.
  - * </p>
  - * <p>
  - * The standard fields are:<br>
  - * <ul>
  - * <li>filePath : Full filesystem path to the document
  - * <li>fileName : File name of the document
  - * <li>fileLastModifiedDate : Date the file was last modified
  - * <li>fileSize : Size of the file in bytes
  - * <li>fileFormat : Extension of the file {@see com.marketingbright.core.util.IOUtils#getFileExtension}
  - * </ul>
  - * </p>
  - *
  - * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  - */
  -public class DocumentHandler
  -{
  -    public static final String[] STANDARD_SEARCH_FIELDS =
  -            {"filePath", "fileName", "fileLastModifiedDate", "fileSize", "fileFormat"};
  -    private static Category cat = Category.getInstance(DocumentHandler.class.getName());
  -    private static Map customFields;
  -    private static final String EMPTY_STRING = "";
  -
  -    /**
  -     * Document object this DocumentHandler is handling.
  -     */
  -    private Document doc;
  -
  -    /**
  -     * Parent Document (null if none).
  -     */
  -    private Document parentDoc;
  -
  -    /**
  -     * IndexWriter to add this document to.
  -     */
  -    private IndexWriter writer;
  -
  -    public static void setCustomFields(Map aCustomFields)
  -    {
  -        customFields = aCustomFields;
  -    }
  -
  -    public DocumentHandler(IndexWriter writer)
  -    {
  -        this.writer = writer;
  -        doc = new Document();
  -    }
  -
  -    public DocumentHandler(IndexWriter writer, Document parentDoc)
  -    {
  -        this(writer);
  -        this.parentDoc = parentDoc;
  -    }
  -
  -    public void process(Map metadata) throws IOException
  -    {
  -        File contentFile = new File((String) metadata.get("filePath"));
  -
  -        // add the standard fields
  -        doc.add(Field.Keyword("filePath", contentFile.toString()));
  -        doc.add(Field.Text("fileName", contentFile.getName()));
  -        doc.add(Field.Keyword("fileLastModifiedDate", DateField.timeToString(contentFile.lastModified())));
  -        doc.add(Field.Keyword("fileSize", String.valueOf(contentFile.length())));
  -        doc.add(Field.Text("fileFormat", IOUtils.getFileExtension(contentFile)));
  -
  -        // check if this is a document from datasource where
  -        // custom fields need to be added
  -        if (parentDoc == null)
  -        {
  -            // add the custom fields
  -            for (Iterator it = customFields.keySet().iterator(); it.hasNext();)
  -            {
  -                String field = (String) it.next();
  -                String value = (String) metadata.get(field);
  -                String type = (String) customFields.get(field);
  -                addFieldToDoc(type, field, value);
  -            }
  -            // Add OBJECT_CLASS_FIELD and OBJECT_IDENTIFIER
  -            // to populate the result templates with the proper
  -            // objects
  -            doc.add(Field.UnIndexed(DataSource.OBJECT_CLASS,
  -                                    (String) metadata.get(DataSource.OBJECT_CLASS)));
  -            doc.add(Field.Text(DataSource.OBJECT_IDENTIFIER,
  -                               (String) metadata.get(DataSource.OBJECT_IDENTIFIER)));
  -        }
  -        else
  -        {
  -            for (Iterator it = customFields.keySet().iterator(); it.hasNext();)
  -            {
  -                String field = (String) it.next();
  -                String value = parentDoc.get(field);
  -                String type = (String) customFields.get(field);
  -                addFieldToDoc(type, field, value);
  -            }
  -            // Add OBJECT_CLASS_FIELD and OBJECT_IDENTIFIER
  -            // to populate the result templates with the proper
  -            // objects
  -            doc.add(Field.UnIndexed(DataSource.OBJECT_CLASS,
  -                                    parentDoc.get(DataSource.OBJECT_CLASS)));
  -            doc.add(Field.Text(DataSource.OBJECT_IDENTIFIER,
  -                               parentDoc.get(DataSource.OBJECT_IDENTIFIER)));
  -        }
  -        if (!metadata.containsKey("fileContents"))
  -        {
  -            String extension = IOUtils.getFileExtension(contentFile);
  -            FileContentHandler cHandler = ContentHandlerFactory.getContentHandler(extension);
  -            if (cHandler != null)
  -            {
  -                cHandler.parse(doc, contentFile);
  -                if (cHandler.isNested())
  -                {
  -                    List nestedData = cHandler.getNestedData();
  -                    cat.debug("Nested data list size:" + nestedData.size());
  -                    for (int i = 0; i < nestedData.size(); i++)
  -                    {
  -                        Map dataMap = (Map) nestedData.get(i);
  -                        DocumentHandler handler = new DocumentHandler(writer, doc);
  -                        handler.process(dataMap);
  -                    }
  -                }
  -            }
  -            else
  -            {
  -                cat.warn("FileContentHandler not found for " + contentFile.getName());
  -            }
  -        }
  -        else
  -            doc.add(Field.Text("fileContents", (String) metadata.get("fileContents")));
  -        addToWriter();
  -    }
  -
  -    public void addToWriter() throws IOException
  -    {
  -        writer.addDocument(this.doc);
  -    }
  -
  -    private void addFieldToDoc(String type, String field, String value)
  -    {
  -        if (value == null)
  -            value = EMPTY_STRING;
  -        if (type.equalsIgnoreCase(SearchConfiguration.TEXT_FIELD_TYPE))
  -            doc.add(Field.Text(field, value));
  -        else if (type.equalsIgnoreCase(SearchConfiguration.KEYWORD_FIELD_TYPE))
  -            doc.add(Field.Keyword(field, value));
  -        else if (type.equalsIgnoreCase(SearchConfiguration.UNINDEXED_FIELD_TYPE))
  -            doc.add(Field.UnIndexed(field, value));
  -        else if (type.equalsIgnoreCase(SearchConfiguration.UNSTORED_FIELD_TYPE))
  -            doc.add(Field.UnStored(field, value));
  -    }
  -}
  +package search;
  +
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Lucene" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Lucene", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +
  +import org.apache.log4j.Category;
  +import org.apache.lucene.document.Document;
  +import org.apache.lucene.document.Field;
  +import org.apache.lucene.index.IndexWriter;
  +import search.util.StringUtils;
  +
  +import java.io.IOException;
  +import java.io.Reader;
  +import java.util.*;
  +
  +/**
  + * <p>
  + * A document is the atomic unit used for indexing purposes. It consists of
  + * metadata as well as its file contents. File contents are handled by
  + * {@link ContentHandler}.
  + * </p>
  + * <p>
  + * DocumentHandler creates the {@link org.apache.lucene.document.Document},
  + * adds fields to it, delegates to {@link ContentHandler} to handle
  + * file contents.
  + * </p>
  + */
  +public class DocumentHandler
  +{
  +    /**
  +     * Field to retrieve all documents.
  +     */
  +    public static final String ALL_DOCUMENTS_FIELD = "AllDocuments";
  +
  +    private static Category cat = Category.getInstance(DocumentHandler.class);
  +
  +    private static boolean isDebugEnabled = cat.isDebugEnabled();
  +
  +    /**
  +     * Should parent documents include data of its children?
  +     */
  +    private static boolean parentEncapsulation = false;
  +    /**
  +     * Document object this DocumentHandler is handling.
  +     */
  +    private Document doc;
  +
  +    /**
  +     * Map of metadata for this document. Contains the field:value pair
  +     * to be added to the document.
  +     */
  +    private Map metadata;
  +
  +    /**
  +     * Map of fields. Contains field:type_of_field pair.
  +     */
  +    private Map customFields;
  +
  +    /**
  +     * IndexWriter.
  +     */
  +    private IndexWriter writer;
  +
  +    /**
  +     * A collection of documents to be added to the writer.
  +     */
  +    private List documents = new ArrayList();
  +
  +    /**
  +     * Ctor.
  +     *
  +     * @param Map of metadata for this document.
  +     * @param Map of fields.
  +     * @param Writer.
  +     */
  +    public DocumentHandler(Map metadata,
  +                           Map customFields,
  +                           IndexWriter writer)
  +    {
  +        this.metadata = metadata;
  +        this.customFields = customFields;
  +        this.writer = writer;
  +    }
  +
  +    /**
  +     * Handles the actual processing of the document.
  +     */
  +    public void process() throws IOException, Exception
  +    {
  +        String objectid = (String) metadata.get(DataSource.OBJECT_IDENTIFIER);
  +        if (objectid == null)
  +            return;
  +        doc = createDocument();
  +        addMapToDoc(metadata);
  +        addNestedDataSource(metadata);
  +        doc.add(Field.Text(ALL_DOCUMENTS_FIELD, ALL_DOCUMENTS_FIELD));
  +        //documents.add(doc);
  +        if (writer != null)
  +        {
  +            addToWriter();
  +        }
  +        else
  +        {
  +            documents.add(doc);
  +        }
  +    }
  +
  +    private List getDocuments()
  +    {
  +        return documents;
  +    }
  +
  +    private Document createDocument()
  +    {
  +        return new Document();
  +    }
  +
  +    /**
  +     * Add the contents of a Map to a document.
  +     *
  +     * @param Map to add.
  +     */
  +    private void addMapToDoc(Map map)
  +    {
  +        for (Iterator it = map.keySet().iterator(); it.hasNext();)
  +        {
  +            String field = (String) it.next();
  +            Object value = map.get(field);
  +            if (value instanceof String)
  +            {
  +                String type = null;
  +                if (customFields != null)
  +                {
  +                    type = (String) customFields.get(field);
  +                }
  +                addFieldToDoc(type, field, (String) value);
  +            }
  +            else if (value instanceof Reader)
  +            {
  +                addFieldToDoc(field, (Reader) value);
  +            }
  +        }
  +    }
  +
  +    /**
  +     * Add nested datasources.
  +     *
  +     * @param Map which contains the nested datasources.
  +     */
  +    private void addNestedDataSource(Map map) throws Exception
  +    {
  +        Object o = map.get(DataSource.NESTED_DATASOURCE);
  +        if (o == null)
  +            return;
  +        if (o instanceof List)
  +        {
  +            List nestedDataSource = (List) o;
  +            for (int i = 0; i < nestedDataSource.size(); i++)
  +            {
  +                DataSource ds = (DataSource) nestedDataSource.get(i);
  +                addDataSource(ds);
  +            }
  +        }
  +        else if (o instanceof DataSource)
  +        {
  +            DataSource ds = (DataSource) o;
  +            addDataSource(ds);
  +        }
  +    }
  +
  +    /**
  +     * Datasources are basically a collection of data maps to be indexed.
  +     * addMapToDoc is invoked for each map.
  +     *
  +     * @param Datasource to add.
  +     */
  +    private void addDataSource(DataSource ds) throws Exception
  +    {
  +        Map[] data = ds.getData();
  +        for (int i = 0; i < data.length; i++)
  +        {
  +            Map map = data[i];
  +            if (map.containsKey(DataSource.OBJECT_IDENTIFIER))
  +            {
  +                /**
  +                 * Create a new document because child datasources may need
  +                 * to be retrieved independently of parent doc.
  +                 */
  +                DocumentHandler docHandler = new DocumentHandler(map, null, null);
  +                docHandler.process();
  +                documents.addAll(docHandler.getDocuments());
  +            }
  +            else
  +            {
  +                addMapToDoc(map);
  +                /**
  +                 * Add nested datasources of this datasource's data
  +                 */
  +                addNestedDataSource(map);
  +            }
  +        }
  +    }
  +
  +    /**
  +     * Adds a String-based field to a document.
  +     *
  +     * @param Type of field.
  +     * @param Name of field.
  +     * @param Value of field.
  +     */
  +    private void addFieldToDoc(String type, String field, String value)
  +    {
  +        if (value == null)
  +            value = StringUtils.EMPTY_STRING;
  +        if (SearchConfiguration.KEYWORD_FIELD_TYPE.equalsIgnoreCase(type))
  +            doc.add(Field.Keyword(field, value));
  +        else if (SearchConfiguration.UNINDEXED_FIELD_TYPE.equalsIgnoreCase(type))
  +            doc.add(Field.UnIndexed(field, value));
  +        else if (SearchConfiguration.UNSTORED_FIELD_TYPE.equalsIgnoreCase(type))
  +            doc.add(Field.UnStored(field, value));
  +        else
  +            doc.add(Field.Text(field, value));
  +    }
  +
  +    /**
  +     * Adds a Reader-based field to a document.
  +     *
  +     * @param Name of field.
  +     * @param Reader.
  +     */
  +    private void addFieldToDoc(String field, Reader reader)
  +    {
  +        doc.add(Field.Text(field, reader));
  +    }
  +
  +    /**
  +     * Adds documents to the IndexWriter.
  +     */
  +    private void addToWriter() throws IOException
  +    {
  +        if (parentEncapsulation)
  +        {
  +            for (int i = 0; i < documents.size(); i++)
  +            {
  +                Document d = (Document) documents.get(i);
  +                for (Enumeration e = d.fields(); e.hasMoreElements();)
  +                {
  +                    Field f = (Field) e.nextElement();
  +                    String fieldName = f.name();
  +                    if (!fieldName.equals(DataSource.CONTAINER_IDENTIFIER)
  +                            && !fieldName.equals(DataSource.OBJECT_CLASS)
  +                            && !fieldName.equals(DataSource.OBJECT_IDENTIFIER))
  +                    {
  +                        doc.add(f);
  +                    }
  +                }
  +            }
  +        }
  +        writer.addDocument(doc);
  +        for (int i = 0; i < documents.size(); i++)
  +        {
  +            writer.addDocument((Document) documents.get(i));
  +        }
  +        //cat.debug((documents.size() + 1) + " documents added.");
  +    }
  +}
  
  
  
  1.2       +159 -109  jakarta-lucene-sandbox/projects/appex/src/java/search/FSDataSource.java
  
  Index: FSDataSource.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/FSDataSource.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- FSDataSource.java	4 May 2002 15:43:46 -0000	1.1
  +++ FSDataSource.java	8 May 2002 15:52:23 -0000	1.2
  @@ -1,109 +1,159 @@
  -package search;
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Turbine" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  - */
  -
  -import org.apache.lucene.document.DateField;
  -import org.apache.lucene.document.Field;
  -
  -import java.io.File;
  -import java.util.ArrayList;
  -import java.util.HashMap;
  -import java.util.List;
  -import java.util.Map;
  -
  -/**
  - * A filesystem-based datasource.
  - *
  - * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  - */
  -public class FSDataSource extends AbstractDataSource
  -{
  -    private File targetDirectory;
  -
  -    public FSDataSource(SearchConfiguration config)
  -    {
  -        super(config);
  -    }
  -
  -    public List getData()
  -    {
  -        List returnData = new ArrayList();
  -        loadDataFromFiles(targetDirectory, returnData);
  -        return returnData;
  -    }
  -
  -    public void setTargetDirectory(File targetDirectory)
  -    {
  -        this.targetDirectory = targetDirectory;
  -    }
  -
  -    private void loadDataFromFiles(File f, List list)
  -    {
  -        if (f.isDirectory())
  -        {
  -            File[] directoryTree = f.listFiles();
  -            for (int i = 0; i < directoryTree.length; i++)
  -            {
  -                loadDataFromFiles(directoryTree[i], list);
  -            }
  -        }
  -        else
  -        {
  -            Map dataMap = new HashMap();
  -            dataMap.put("filePath", f.getPath());
  -            list.add(dataMap);
  -        }
  -    }
  -}
  +package search;
  +
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Lucene" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Lucene", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +
  +import org.apache.lucene.document.DateField;
  +import search.contenthandler.FileContentHandler;
  +import search.contenthandler.FileContentHandlerFactory;
  +import search.util.IOUtils;
  +
  +import java.io.File;
  +import java.io.Reader;
  +import java.util.ArrayList;
  +import java.util.HashMap;
  +import java.util.List;
  +import java.util.Map;
  +
  +/**
  + * A filesystem-based datasource.
  + *
  + * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  + */
  +public class FSDataSource extends AbstractDataSource
  +{
  +    public static final String FILE_PATH_FIELD = "filePath";
  +    public static final String FILE_NAME_FIELD = "fileName";
  +    public static final String FILE_SIZE_FIELD = "fileSize";
  +    public static final String FILE_FORMAT_FIELD = "fileFormat";
  +    public static final String FILE_CONTENTS_FIELD = "fileContents";
  +    public static final String FILE_LAST_MODIFIED_DATE_FIELD = "fileLastModifiedDate";
  +
  +    private File targetFileOrDir;
  +
  +    public FSDataSource(String targetFileOrDirStr)
  +    {
  +        this(new File(targetFileOrDirStr));
  +    }
  +
  +    public FSDataSource(File targetFileOrDir)
  +    {
  +        setTargetDirectory(targetFileOrDir);
  +    }
  +
  +    public Map[] getData()
  +    {
  +        Map[] returnData = null;
  +        List temp = new ArrayList();
  +        loadDataFromFiles(targetFileOrDir, temp);
  +        returnData = new Map[temp.size()];
  +        returnData = (Map[]) temp.toArray(returnData);
  +        return returnData;
  +    }
  +
  +    public void setTargetDirectory(File targetFileOrDir)
  +    {
  +        this.targetFileOrDir = targetFileOrDir;
  +    }
  +
  +    private void loadDataFromFiles(File f, List list)
  +    {
  +        if (f.isDirectory())
  +        {
  +            File[] directoryTree = f.listFiles();
  +            for (int i = 0; i < directoryTree.length; i++)
  +            {
  +                loadDataFromFiles(directoryTree[i], list);
  +            }
  +        }
  +        else
  +        {
  +            Map dataMap = new HashMap();
  +            dataMap.put(FILE_PATH_FIELD, f.getPath());
  +            dataMap.put(FILE_NAME_FIELD, f.getName());
  +            dataMap.put(FILE_LAST_MODIFIED_DATE_FIELD,
  +                        DateField.timeToString(f.lastModified()));
  +            dataMap.put(FILE_SIZE_FIELD, String.valueOf(f.length()));
  +            dataMap.put(FILE_FORMAT_FIELD,
  +                        IOUtils.getFileExtension(f));
  +            addFileContents(f, dataMap);
  +            list.add(dataMap);
  +        }
  +    }
  +
  +    private void addFileContents(File targetFile, Map dataMap)
  +    {
  +        FileContentHandler cHandler =
  +                FileContentHandlerFactory.getContentHandler(targetFile);
  +        if (cHandler != null)
  +        {
  +            if (cHandler.fileContentIsReadable())
  +            {
  +                Reader r = cHandler.getReader();
  +                if (r != null)
  +                {
  +                    dataMap.put(FILE_CONTENTS_FIELD, r);
  +                }
  +            }
  +            if (cHandler.containsNestedData())
  +            {
  +                dataMap.put(NESTED_DATASOURCE, cHandler.getNestedDataSource());
  +            }
  +        }
  +        else
  +        {
  +            //cat.warn("ContentHandler not found for " + contentFile.getName());
  +        }
  +    }
  +}
  
  
  
  1.2       +2 -2      jakarta-lucene-sandbox/projects/appex/src/java/search/IllegalConfigurationException.java
  
  Index: IllegalConfigurationException.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/IllegalConfigurationException.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- IllegalConfigurationException.java	4 May 2002 15:43:46 -0000	1.1
  +++ IllegalConfigurationException.java	8 May 2002 15:52:23 -0000	1.2
  @@ -26,12 +26,12 @@
    *    if and wherever such third-party acknowledgments normally appear.
    *
    * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Turbine" must not be used to endorse or promote products
  + *    "Apache Lucene" must not be used to endorse or promote products
    *    derived from this software without prior written permission. For
    *    written permission, please contact apache@apache.org.
    *
    * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  + *    "Apache Lucene", nor may "Apache" appear in their name, without
    *    prior written permission of the Apache Software Foundation.
    *
    * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  
  
  
  1.2       +257 -257  jakarta-lucene-sandbox/projects/appex/src/java/search/SearchConfiguration.java
  
  Index: SearchConfiguration.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/SearchConfiguration.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- SearchConfiguration.java	4 May 2002 15:43:46 -0000	1.1
  +++ SearchConfiguration.java	8 May 2002 15:52:23 -0000	1.2
  @@ -1,257 +1,257 @@
  -package search;
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Turbine" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  - */
  -
  -import org.apache.log4j.Category;
  -import org.jdom.Document;
  -import org.jdom.Element;
  -import org.jdom.input.SAXBuilder;
  -import search.util.DataUnformatFilter;
  -import search.contenthandler.ContentHandlerFactory;
  -
  -import java.util.HashMap;
  -import java.util.List;
  -import java.util.Map;
  -import java.util.StringTokenizer;
  -
  -/**
  - * Configures the indexing process using an XML file.
  - *
  - * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  - */
  -public class SearchConfiguration
  -{
  -    public static final String TEXT_FIELD_TYPE = "text";
  -    public static final String KEYWORD_FIELD_TYPE = "keyword";
  -    public static final String UNINDEXED_FIELD_TYPE = "unindexed";
  -    public static final String UNSTORED_FIELD_TYPE = "unstored";
  -
  -    /** Log4j category.
  -     */
  -    static Category cat = Category.getInstance(SearchConfiguration.class.getName());
  -
  -    /**
  -     * Key in the config file to declare content handlers.
  -     */
  -    private static final String CONTENT_HANDLER_KEY = "Search.ContentHandlers";
  -
  -    /**
  -     * Key in the config file to declare custom fields.
  -     */
  -    private static final String FIELD_KEY = "Search.Fields";
  -
  -    /**
  -     * Map of content handlers.
  -     */
  -    private Map contentHandlers = new HashMap();
  -
  -    /**
  -     * Map of (non-standard) custom fields to index.
  -     */
  -    private Map customFields = new HashMap();
  -
  -    /**
  -     * Document object which represents the xml configuration file.
  -     */
  -    private Document doc;
  -
  -    /**
  -     * Creates a new SearchConfiguration.
  -     *
  -     * @param configFile Name of the xml configuration file.
  -     */
  -    public SearchConfiguration(String configFile) throws IllegalConfigurationException
  -    {
  -        try
  -        {
  -            SAXBuilder builder = new SAXBuilder();
  -            DataUnformatFilter format = new DataUnformatFilter();
  -            builder.setXMLFilter(format);
  -            doc = builder.build(configFile);
  -        }
  -        catch (Exception e)
  -        {
  -            cat.error("Error creating XML parser:" + e.getMessage(), e);
  -        }
  -        loadContentHandlers();
  -        loadCustomFields();
  -    }
  -
  -    public Map getContentHandlers()
  -    {
  -        return this.contentHandlers;
  -    }
  -
  -    public Map getCustomFields()
  -    {
  -        return this.customFields;
  -    }
  -
  -    /**
  -     * Loads the content handlers.
  -     */
  -    protected void loadContentHandlers() throws IllegalConfigurationException
  -    {
  -        String[] extensions = getChildPropertyAttributeValues(CONTENT_HANDLER_KEY, "extension");
  -        String[] handlers = getChildPropertyAttributeValues(CONTENT_HANDLER_KEY, "handler");
  -        if (extensions.length != handlers.length)
  -            throw new IllegalConfigurationException(
  -                    "Illegal configuration of Search Content Handlers!");
  -        for (int i = 0; i < extensions.length; i++)
  -        {
  -            contentHandlers.put(extensions[i], generateObject(handlers[i]));
  -        }
  -        String[] defaultExtension = getChildPropertyAttributeValues(CONTENT_HANDLER_KEY, "default");
  -        for (int i = 0; i < defaultExtension.length; i++)
  -        {
  -            if (defaultExtension[i] != null && defaultExtension[i].equals("true"))
  -            {
  -                contentHandlers.put(ContentHandlerFactory.DEFAULT_HANDLER_KEY
  -                                    , generateObject(handlers[i]));
  -            }
  -        }
  -    }
  -
  -    /**
  -     * Loads the custom fields to index.
  -     */
  -    protected void loadCustomFields() throws IllegalConfigurationException
  -    {
  -        String[] fields = getChildPropertyAttributeValues(FIELD_KEY, "name");
  -        String[] fieldtypes = getChildPropertyAttributeValues(FIELD_KEY, "type");
  -        if (fields.length != fieldtypes.length)
  -            throw new IllegalConfigurationException(
  -                    "Illegal configuration of custom search fields!");
  -        for (int i = 0; i < fields.length; i++)
  -        {
  -            customFields.put(fields[i], fieldtypes[i]);
  -        }
  -    }
  -
  -    /**
  -     * Return attribute values for all child nodes.
  -     */
  -    private String[] getChildPropertyAttributeValues(String parent,
  -                                                     String attributeName)
  -    {
  -        String[] nodeName = parseNodeName(parent);
  -        Element element = doc.getRootElement();
  -        for (int i = 0; i < nodeName.length; i++)
  -        {
  -            element = element.getChild(nodeName[i]);
  -            if (element == null)
  -            {
  -                return new String[]{};
  -            }
  -        }
  -        List children = element.getChildren();
  -        int childCount = children.size();
  -        String[] childrenAttributeValue = new String[childCount];
  -        for (int i = 0; i < childCount; i++)
  -        {
  -            childrenAttributeValue[i] =
  -                    ((Element) children.get(i)).getAttributeValue(attributeName);
  -        }
  -        return childrenAttributeValue;
  -    }
  -
  -    /**
  -     * Node names are in the form "x.y.z". Returns a String array
  -     * representation of the node elements.
  -     */
  -    private String[] parseNodeName(String nodeName)
  -    {
  -        StringTokenizer st = new StringTokenizer(nodeName, ".");
  -        String[] nodeElements = new String[st.countTokens()];
  -        int i = 0;
  -        while (st.hasMoreTokens())
  -        {
  -            nodeElements[i] = st.nextToken();
  -            ++i;
  -        }
  -        return nodeElements;
  -    }
  -
  -    /**
  -     * Utility method to return an object based on its class name.
  -     * The object needs to have a constructor which accepts no parameters.
  -     *
  -     * @param className  Class name of object to be generated
  -     * @return Object
  -     */
  -    private static Object generateObject(String className)
  -    {
  -        Object o = null;
  -        try
  -        {
  -            Class c = Class.forName(className);
  -            o = c.newInstance();
  -        }
  -        catch (ClassNotFoundException cnfe)
  -        {
  -            cat.error(cnfe.getMessage() + " No class named '" + className + "' was found.", cnfe);
  -        }
  -        catch (InstantiationException ie)
  -        {
  -            cat.error(ie.getMessage() + " Class named '" + className + "' could not be  instantiated.", ie);
  -        }
  -        catch (IllegalAccessException iae)
  -        {
  -            cat.error(iae.getMessage() + " No access to class named '" + className + "'.", iae);
  -        }
  -        return o;
  -    }
  -
  -}
  +package search;
  +
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache Lucene" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Lucene", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
  + */
  +
  +import org.apache.log4j.Category;
  +import org.jdom.Document;
  +import org.jdom.Element;
  +import org.jdom.input.SAXBuilder;
  +import search.util.DataUnformatFilter;
  +import search.contenthandler.FileContentHandlerFactory;
  +
  +import java.util.HashMap;
  +import java.util.List;
  +import java.util.Map;
  +import java.util.StringTokenizer;
  +
  +/**
  + * Configures the indexing process using an XML file.
  + *
  + * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  + */
  +public class SearchConfiguration
  +{
  +    public static final String TEXT_FIELD_TYPE = "text";
  +    public static final String KEYWORD_FIELD_TYPE = "keyword";
  +    public static final String UNINDEXED_FIELD_TYPE = "unindexed";
  +    public static final String UNSTORED_FIELD_TYPE = "unstored";
  +
  +    /** Log4j category.
  +     */
  +    static Category cat = Category.getInstance(SearchConfiguration.class.getName());
  +
  +    /**
  +     * Key in the config file to declare content handlers.
  +     */
  +    private static final String CONTENT_HANDLER_KEY = "Search.ContentHandlers";
  +
  +    /**
  +     * Key in the config file to declare custom fields.
  +     */
  +    private static final String FIELD_KEY = "Search.Fields";
  +
  +    /**
  +     * Map of content handlers.
  +     */
  +    private Map contentHandlers = new HashMap();
  +
  +    /**
  +     * Map of (non-standard) custom fields to index.
  +     */
  +    private Map customFields = new HashMap();
  +
  +    /**
  +     * Document object which represents the xml configuration file.
  +     */
  +    private Document doc;
  +
  +    /**
  +     * Creates a new SearchConfiguration.
  +     *
  +     * @param configFile Name of the xml configuration file.
  +     */
  +    public SearchConfiguration(String configFile) throws IllegalConfigurationException
  +    {
  +        try
  +        {
  +            SAXBuilder builder = new SAXBuilder();
  +            DataUnformatFilter format = new DataUnformatFilter();
  +            builder.setXMLFilter(format);
  +            doc = builder.build(configFile);
  +        }
  +        catch (Exception e)
  +        {
  +            cat.error("Error creating XML parser:" + e.getMessage(), e);
  +        }
  +        loadContentHandlers();
  +        loadCustomFields();
  +    }
  +
  +    public Map getContentHandlers()
  +    {
  +        return this.contentHandlers;
  +    }
  +
  +    public Map getCustomFields()
  +    {
  +        return this.customFields;
  +    }
  +
  +    /**
  +     * Loads the content handlers.
  +     */
  +    protected void loadContentHandlers() throws IllegalConfigurationException
  +    {
  +        String[] extensions = getChildPropertyAttributeValues(CONTENT_HANDLER_KEY, "extension");
  +        String[] handlers = getChildPropertyAttributeValues(CONTENT_HANDLER_KEY, "handler");
  +        if (extensions.length != handlers.length)
  +            throw new IllegalConfigurationException(
  +                    "Illegal configuration of Search Content Handlers!");
  +        for (int i = 0; i < extensions.length; i++)
  +        {
  +            contentHandlers.put(extensions[i], generateObject(handlers[i]));
  +        }
  +        String[] defaultExtension = getChildPropertyAttributeValues(CONTENT_HANDLER_KEY, "default");
  +        for (int i = 0; i < defaultExtension.length; i++)
  +        {
  +            if (defaultExtension[i] != null && defaultExtension[i].equals("true"))
  +            {
  +                contentHandlers.put(FileContentHandlerFactory.DEFAULT_HANDLER_KEY
  +                                    , generateObject(handlers[i]));
  +            }
  +        }
  +    }
  +
  +    /**
  +     * Loads the custom fields to index.
  +     */
  +    protected void loadCustomFields() throws IllegalConfigurationException
  +    {
  +        String[] fields = getChildPropertyAttributeValues(FIELD_KEY, "name");
  +        String[] fieldtypes = getChildPropertyAttributeValues(FIELD_KEY, "type");
  +        if (fields.length != fieldtypes.length)
  +            throw new IllegalConfigurationException(
  +                    "Illegal configuration of custom search fields!");
  +        for (int i = 0; i < fields.length; i++)
  +        {
  +            customFields.put(fields[i], fieldtypes[i]);
  +        }
  +    }
  +
  +    /**
  +     * Return attribute values for all child nodes.
  +     */
  +    private String[] getChildPropertyAttributeValues(String parent,
  +                                                     String attributeName)
  +    {
  +        String[] nodeName = parseNodeName(parent);
  +        Element element = doc.getRootElement();
  +        for (int i = 0; i < nodeName.length; i++)
  +        {
  +            element = element.getChild(nodeName[i]);
  +            if (element == null)
  +            {
  +                return new String[]{};
  +            }
  +        }
  +        List children = element.getChildren();
  +        int childCount = children.size();
  +        String[] childrenAttributeValue = new String[childCount];
  +        for (int i = 0; i < childCount; i++)
  +        {
  +            childrenAttributeValue[i] =
  +                    ((Element) children.get(i)).getAttributeValue(attributeName);
  +        }
  +        return childrenAttributeValue;
  +    }
  +
  +    /**
  +     * Node names are in the form "x.y.z". Returns a String array
  +     * representation of the node elements.
  +     */
  +    private String[] parseNodeName(String nodeName)
  +    {
  +        StringTokenizer st = new StringTokenizer(nodeName, ".");
  +        String[] nodeElements = new String[st.countTokens()];
  +        int i = 0;
  +        while (st.hasMoreTokens())
  +        {
  +            nodeElements[i] = st.nextToken();
  +            ++i;
  +        }
  +        return nodeElements;
  +    }
  +
  +    /**
  +     * Utility method to return an object based on its class name.
  +     * The object needs to have a constructor which accepts no parameters.
  +     *
  +     * @param className  Class name of object to be generated
  +     * @return Object
  +     */
  +    private static Object generateObject(String className)
  +    {
  +        Object o = null;
  +        try
  +        {
  +            Class c = Class.forName(className);
  +            o = c.newInstance();
  +        }
  +        catch (ClassNotFoundException cnfe)
  +        {
  +            cat.error(cnfe.getMessage() + " No class named '" + className + "' was found.", cnfe);
  +        }
  +        catch (InstantiationException ie)
  +        {
  +            cat.error(ie.getMessage() + " Class named '" + className + "' could not be  instantiated.", ie);
  +        }
  +        catch (IllegalAccessException iae)
  +        {
  +            cat.error(iae.getMessage() + " No access to class named '" + className + "'.", iae);
  +        }
  +        return o;
  +    }
  +
  +}
  
  
  
  1.2       +125 -130  jakarta-lucene-sandbox/projects/appex/src/java/search/SearchIndexer.java
  
  Index: SearchIndexer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene-sandbox/projects/appex/src/java/search/SearchIndexer.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- SearchIndexer.java	4 May 2002 15:43:46 -0000	1.1
  +++ SearchIndexer.java	8 May 2002 15:52:23 -0000	1.2
  @@ -1,132 +1,127 @@
  -package search;
  -
  -/* ====================================================================
  - * The Apache Software License, Version 1.1
  - *
  - * Copyright (c) 2001 The Apache Software Foundation.  All rights
  - * reserved.
  - *
  - * Redistribution and use in source and binary forms, with or without
  - * modification, are permitted provided that the following conditions
  - * are met:
  - *
  - * 1. Redistributions of source code must retain the above copyright
  - *    notice, this list of conditions and the following disclaimer.
  - *
  - * 2. Redistributions in binary form must reproduce the above copyright
  - *    notice, this list of conditions and the following disclaimer in
  - *    the documentation and/or other materials provided with the
  - *    distribution.
  - *
  - * 3. The end-user documentation included with the redistribution,
  - *    if any, must include the following acknowledgment:
  - *       "This product includes software developed by the
  - *        Apache Software Foundation (http://www.apache.org/)."
  - *    Alternately, this acknowledgment may appear in the software itself,
  - *    if and wherever such third-party acknowledgments normally appear.
  - *
  - * 4. The names "Apache" and "Apache Software Foundation" and
  - *    "Apache Turbine" must not be used to endorse or promote products
  - *    derived from this software without prior written permission. For
  - *    written permission, please contact apache@apache.org.
  - *
  - * 5. Products derived from this software may not be called "Apache",
  - *    "Apache Turbine", nor may "Apache" appear in their name, without
  - *    prior written permission of the Apache Software Foundation.
  - *
  - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  - * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  - * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  - * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  - * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  - * SUCH DAMAGE.
  - * ====================================================================
  - *
  - * This software consists of voluntary contributions made by many
  - * individuals on behalf of the Apache Software Foundation.  For more
  - * information on the Apache Software Foundation, please see
  - * <http://www.apache.org/>.
  +package search;
  +
  +/* ====================================================================
  + * The Apache Software License, Version 1.1
  + *
  + * Copyright (c) 2001 The Apache Software Foundation.  All rights
  + * reserved.
  + *
  + * Redistribution and use in source and binary forms, with or without
  + * modification, are permitted provided that the following conditions
  + * are met:
  + *
  + * 1. Redistributions of source code must retain the above copyright
  + *    notice, this list of conditions and the following disclaimer.
  + *
  + * 2. Redistributions in binary form must reproduce the above copyright
  + *    notice, this list of conditions and the following disclaimer in
  + *    the documentation and/or other materials provided with the
  + *    distribution.
  + *
  + * 3. The end-user documentation included with the redistribution,
  + *    if any, must include the following acknowledgment:
  + *       "This product includes software developed by the
  + *        Apache Software Foundation (http://www.apache.org/)."
  + *    Alternately, this acknowledgment may appear in the software itself,
  + *    if and wherever such third-party acknowledgments normally appear.
  + *
  + * 4. The names "Apache" and "Apache Software Foundation" and
  + *    "Apache POI" must not be used to endorse or promote products
  + *    derived from this software without prior written permission. For
  + *    written permission, please contact apache@apache.org.
  + *
  + * 5. Products derived from this software may not be called "Apache",
  + *    "Apache Lucene", nor may "Apache" appear in their name, without
  + *    prior written permission of the Apache Software Foundation.
  + *
  + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
  + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  + * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
  + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
  + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
  + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  + * SUCH DAMAGE.
  + * ====================================================================
  + *
  + * This software consists of voluntary contributions made by many
  + * individuals on behalf of the Apache Software Foundation.  For more
  + * information on the Apache Software Foundation, please see
  + * <http://www.apache.org/>.
    */
  -
  -import org.apache.lucene.analysis.standard.StandardAnalyzer;
  -import org.apache.lucene.index.IndexWriter;
  -import org.apache.log4j.Category;
  -
  -import java.io.IOException;
  -import java.util.List;
  -import java.util.Map;
  -
  -import search.contenthandler.ContentHandlerFactory;
  -
  -/**
  - * Entry point for search engine indexing.
  - * <p>
  - * SearchIndexer is responsible for creating the IndexWriter {@see org.apache.lucene.index.IndexWriter}
  - * and passing it to DocumentHandlers {@link DocumentHandler} to index individual documents.
  - * </p>
  - *
  - * @author <a href="mailto:kelvin@relevanz.com">Kelvin Tan</a>
  - */
  -public class SearchIndexer
  -{
  -    private static Category cat = Category.getInstance(SearchIndexer.class);
  -
  -    private IndexWriter writer;
  -    private DataSource source;
  -    private int indexedDocuments = 0;
  -
  -    public SearchIndexer() throws IOException
  -    {
  -        writer = new IndexWriter("/usr/local/lucene/index",
  -                                 new StandardAnalyzer(), true);
  -    }
  -
  -    public void index() throws IOException, Exception
  -    {
  -        cat.debug("Initiating indexing...");
  -
  -        init();
  -        List dataMapList = source.getData();
  -        for (int i = 0; i < dataMapList.size(); i++)
  -        {
  -            Map map = (Map) dataMapList.get(i);
  -            DocumentHandler docHandler = new DocumentHandler(writer);
  -            try
  -            {
  -                docHandler.process(map);
  -                ++indexedDocuments;
  -            }
  -            catch (IOException ioe)
  -            {
  -                cat.error("Error encountered indexing:" + ioe.getMessage(),
  -                          ioe);
  -            }
  -        }
  -        writer.optimize();
  -        writer.close();
  -
  -        cat.debug(indexedDocuments + " documents were indexed.");
  -    }
  -
  -    public void setSource(DataSource source)
  -    {
  -        this.source = source;
  -    }
  -
  -    public void init()
  -    {
  -        ContentHandlerFactory.setContentHandlers(source.getConfig().getContentHandlers());
  -        DocumentHandler.setCustomFields(source.getConfig().getCustomFields());
  -    }
  -
  -    public int getIndexedDocuments()
  -    {
  -        return this.indexedDocuments;
  -    }
  +import org.apache.log4j.Category;
  +import org.apache.lucene.analysis.Analyzer;
  +import org.apache.lucene.analysis.standard.StandardAnalyzer;
  +import org.apache.lucene.index.IndexWriter;
  +
  +import java.io.IOException;
  +import java.util.List;
  +import java.util.Map;
  +
  +import search.contenthandler.FileContentHandlerFactory;
  +
  +/**
  + * Entry point for search engine indexing.
  + * <p>
  + * SearchIndexer is responsible for creating the IndexWriter
  + * {@see org.apache.lucene.index.IndexWriter} and passing it to
  + *  DocumentHandlers {@link DocumentHandler} to index individual documents.
  + * </p>
  + */
  +public class SearchIndexer
  +{
  +    private static Category cat = Category.getInstance(SearchIndexer.class);
  +    private IndexWriter fsWriter;
  +    private SearchConfiguration config;
  +    private int indexedDocuments = 0;
  +
  +    public SearchIndexer() throws IOException
  +    {
  +        Analyzer a = new StandardAnalyzer();
  +        String indexDirectory = "/usr/path/to/index";
  +        fsWriter = new IndexWriter(indexDirectory, a, true);
  +        fsWriter.maxFieldLength = 1000000;
  +    }
  +
  +    /**
  +     * Indexes documents.
  +     */
  +    public synchronized void index() throws IOException, Exception
  +    {
  +        cat.debug("Initiating search engine indexing...");
  +        long start = System.currentTimeMillis();
  +        loadConfig();
  +        fsWriter.optimize();
  +        fsWriter.close();
  +        long stop = System.currentTimeMillis();
  +        cat.debug("Indexing took " + (stop - start) + " milliseconds");
  +    }
  +
  +    public int getIndexedDocuments()
  +    {
  +        return this.indexedDocuments;
  +    }
  +
  +    private void loadConfig() throws IllegalConfigurationException
  +    {
  +        config = new SearchConfiguration("/path/to/config");
  +        FileContentHandlerFactory.setHandlerRegistry(config.getContentHandlers());
  +    }
  +
  +    private void indexDataSource(DataSource source, Map customFields)
  +            throws Exception
  +    {
  +        Map[] data = source.getData();
  +        // here's a good place to spawn a couple of threads for indexing
  +        for (int i = 0; i < data.length; i++)
  +        {
  +            DocumentHandler docHandler =
  +                    new DocumentHandler(data[i], customFields, fsWriter);
  +            docHandler.process();
  +        }
  +    }
   }
  
  
  

--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>