You are viewing a plain text version of this content. The canonical link for it is here.
Posted to slide-dev@jakarta.apache.org by un...@apache.org on 2004/09/29 17:28:06 UTC

cvs commit: jakarta-slide/src/stores/org/apache/slide/index TextContentIndexer.java

unico       2004/09/29 08:28:06

  Modified:    src/share/org/apache/slide/extractor MSWordExtractor.java
                        AbstractPropertyExtractor.java
                        ExtractorManager.java AbstractContentExtractor.java
                        OfficeExtractor.java Extractor.java
                        MSPowerPointExtractor.java PDFExtractor.java
                        SimpleXmlExtractor.java
                        PropertyExtractorTrigger.java MSExcelExtractor.java
               src/share/org/apache/slide/common Namespace.java Domain.java
               src/stores/org/apache/slide/index TextContentIndexer.java
  Log:
  per-namespace extractor configuration:
  extractors can now be declared for each namespace separately
  in addition to the global extractor configuration for the whole domain.
  
  Revision  Changes    Path
  1.2       +6 -6      jakarta-slide/src/share/org/apache/slide/extractor/MSWordExtractor.java
  
  Index: MSWordExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/MSWordExtractor.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- MSWordExtractor.java	29 Jun 2004 08:10:57 -0000	1.1
  +++ MSWordExtractor.java	29 Sep 2004 15:28:06 -0000	1.2
  @@ -35,8 +35,8 @@
   
   public class MSWordExtractor extends AbstractContentExtractor {
   
  -    public MSWordExtractor(String uri, String contentType) {
  -        super(uri, contentType);
  +    public MSWordExtractor(String uri, String contentType, String namespace) {
  +        super(uri, contentType, namespace);
       }
   
       public Reader extract(InputStream content)  throws ExtractorException {
  @@ -57,7 +57,7 @@
           {
               FileInputStream in = new FileInputStream(args[0]);
   
  -            MSWordExtractor ex = new MSWordExtractor(null, null);
  +            MSWordExtractor ex = new MSWordExtractor(null, null, null);
   
               Reader reader = ex.extract(in);
   
  
  
  
  1.3       +16 -5     jakarta-slide/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java
  
  Index: AbstractPropertyExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- AbstractPropertyExtractor.java	28 Jul 2004 09:36:01 -0000	1.2
  +++ AbstractPropertyExtractor.java	29 Sep 2004 15:28:06 -0000	1.3
  @@ -28,15 +28,21 @@
   
   /**
    * The AbstractPropertyExtractor class
  - * 
    */
   public abstract class AbstractPropertyExtractor implements PropertyExtractor {
  +
       private String contentType;
       private String uri;
  +    private String namespace;
   
       public AbstractPropertyExtractor(String uri, String contentType) {
  +        this(uri, contentType, null);
  +    }
  +
  +    public AbstractPropertyExtractor(String uri, String contentType, String namespace) {
           this.contentType = contentType;
           this.uri = uri;
  +        this.namespace = namespace;
       }
   
       public abstract Map extract(InputStream content) throws ExtractorException;
  @@ -48,4 +54,9 @@
       public String getUri() {
           return uri;
       }
  -}
  \ No newline at end of file
  +
  +    public String getNamespace() {
  +        return namespace;
  +    }
  +
  +}
  
  
  
  1.5       +22 -13    jakarta-slide/src/share/org/apache/slide/extractor/ExtractorManager.java
  
  Index: ExtractorManager.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/ExtractorManager.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- ExtractorManager.java	28 Jul 2004 09:36:01 -0000	1.4
  +++ ExtractorManager.java	29 Sep 2004 15:28:06 -0000	1.5
  @@ -34,7 +34,6 @@
   
   /**
    * The ExtractorManager class
  - * 
    */
   public class ExtractorManager implements Configurable {
       private final static ExtractorManager manager = new ExtractorManager();
  @@ -51,11 +50,11 @@
           extractors.add(extractor);
       }
   
  -    public PropertyExtractor[] getPropertyExtractors(NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor) {
  +    public PropertyExtractor[] getPropertyExtractors(String namespace, NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor) {
           List matchingExtractors = new ArrayList();
           for ( Iterator i = extractors.iterator(); i.hasNext(); ) {
               Extractor extractor = (Extractor)i.next();
  -            if ( extractor instanceof PropertyExtractor && matches(extractor,  descriptors, descriptor)) {
  +            if ( extractor instanceof PropertyExtractor && matches(extractor, namespace, descriptors, descriptor)) {
                   matchingExtractors.add(extractor);
               }
           }
  @@ -63,11 +62,11 @@
           return (PropertyExtractor [])matchingExtractors.toArray(extractors);
       };
   
  -    public ContentExtractor[] getContentExtractors(NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor) {
  +    public ContentExtractor[] getContentExtractors(String namespace, NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor) {
           List matchingExtractors = new ArrayList();
           for ( Iterator i = extractors.iterator(); i.hasNext(); ) {
               Extractor extractor = (Extractor)i.next();
  -            if ( extractor instanceof ContentExtractor && matches(extractor,  descriptors, descriptor)) {
  +            if ( extractor instanceof ContentExtractor && matches(extractor, namespace, descriptors, descriptor)) {
                   matchingExtractors.add(extractor);
               }
           }
  @@ -75,11 +74,11 @@
           return (ContentExtractor [])matchingExtractors.toArray(extractors);
       };
   
  -    public Extractor[] getExtractors(NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor) {
  +    public Extractor[] getExtractors(String namespace, NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor) {
           List matchingExtractors = new ArrayList();
           for ( Iterator i = extractors.iterator(); i.hasNext(); ) {
               Extractor extractor = (Extractor)i.next();
  -            if ( matches(extractor,  descriptors, descriptor)) {
  +            if ( matches(extractor, namespace, descriptors, descriptor)) {
                   matchingExtractors.add(extractor);
               }
           }
  @@ -87,7 +86,7 @@
           return (Extractor [])matchingExtractors.toArray(extractors);
       };
   
  -    public boolean matches(Extractor extractor, NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor) {
  +    public boolean matches(Extractor extractor, String namespace, NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor) {
           boolean matching = true;
           if ( descriptor != null && extractor.getContentType() != null && !descriptor.getContentType().equals(extractor.getContentType()) ) {
               matching = false;
  @@ -95,6 +94,9 @@
           if ( descriptors != null && extractor.getUri() != null && !descriptors.getUri().startsWith(extractor.getUri()) ) {
               matching = false;
           }
  +        if ( descriptors != null && extractor.getNamespace() != null && !extractor.getNamespace().equals(namespace)) {
  +            matching = false;
  +        }
           return matching;
       }
   
  @@ -105,11 +107,18 @@
               String classname = extractorConfig.getAttribute("classname");
               String uri = extractorConfig.getAttribute("uri", null);
               String contentType = extractorConfig.getAttribute("content-type", null);
  +            String namespace = extractorConfig.getAttribute("namespace", null);
               try {
                   Class extractorClass = Class.forName(classname);
                   Extractor extractor = null;
  -                Constructor extractorConstructor = extractorClass.getConstructor(new Class[] { String.class, String.class } );
  -                extractor = (Extractor)extractorConstructor.newInstance(new String[] { uri, contentType });
  +                try {
  +                    Constructor extractorConstructor = extractorClass.getConstructor(new Class[] { String.class, String.class, String.class } );
  +                    extractor = (Extractor) extractorConstructor.newInstance(new String[] { uri, contentType, namespace });
  +                }
  +                catch (NoSuchMethodException e) {
  +                    Constructor extractorConstructor = extractorClass.getConstructor(new Class[] { String.class, String.class } );
  +                    extractor = (Extractor) extractorConstructor.newInstance(new String[] { uri, contentType });
  +                }
                   if ( extractor instanceof Configurable ) {
                       ((Configurable)extractor).configure(extractorConfig.getConfiguration("configuration"));
                   }
  
  
  
  1.4       +16 -5     jakarta-slide/src/share/org/apache/slide/extractor/AbstractContentExtractor.java
  
  Index: AbstractContentExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/AbstractContentExtractor.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- AbstractContentExtractor.java	28 Jul 2004 09:36:01 -0000	1.3
  +++ AbstractContentExtractor.java	29 Sep 2004 15:28:06 -0000	1.4
  @@ -28,15 +28,21 @@
   
   /**
    * The AbstractPropertyExtractor class
  - * 
    */
   public abstract class AbstractContentExtractor implements ContentExtractor {
  +
       private String contentType;
       private String uri;
  +    private String namespace;
   
       public AbstractContentExtractor(String uri, String contentType) {
  +        this(uri, contentType, null);
  +    }
  +
  +    public AbstractContentExtractor(String uri, String contentType, String namespace) {
           this.contentType = contentType;
           this.uri = uri;
  +        this.namespace = namespace;
       }
   
       public abstract Reader extract(InputStream content) throws ExtractorException;
  @@ -48,4 +54,9 @@
       public String getUri() {
           return uri;
       }
  -}
  \ No newline at end of file
  +    
  +    public String getNamespace() {
  +        return namespace;
  +    }
  +
  +}
  
  
  
  1.3       +83 -83    jakarta-slide/src/share/org/apache/slide/extractor/OfficeExtractor.java
  
  Index: OfficeExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/OfficeExtractor.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- OfficeExtractor.java	28 Jul 2004 09:36:01 -0000	1.2
  +++ OfficeExtractor.java	29 Sep 2004 15:28:06 -0000	1.3
  @@ -1,84 +1,84 @@
  -package org.apache.slide.extractor;
  -
  -import java.io.InputStream;
  -import java.util.*;
  -
  -import org.apache.poi.hpsf.*;
  -import org.apache.poi.poifs.eventfilesystem.*;
  -import org.apache.slide.util.conf.Configurable;
  -import org.apache.slide.util.conf.Configuration;
  -import org.apache.slide.util.conf.ConfigurationException;
  -
  -/**
  - * The OfficeExtractor class
  - * 
  - */
  -public class OfficeExtractor extends AbstractPropertyExtractor implements Configurable {
  -	protected List instructions = new ArrayList();
  -	protected Map propertyMap = new HashMap();
  -	
  -	public OfficeExtractor(String uri, String contentType) {
  -		super(uri, contentType);
  -	}
  -
  -	public Map extract(InputStream content) throws ExtractorException {
  -		OfficePropertiesListener listener = new OfficePropertiesListener();
  -		try {
  -			POIFSReader r = new POIFSReader();
  -			r.registerListener(listener);
  -			r.read(content);
  -		} catch (Exception e) {
  -			throw new ExtractorException("Exception while extracting properties in OfficeExtractor");
  -		}
  -		return listener.getProperties();
  -	}
  -
  -	class OfficePropertiesListener implements POIFSReaderListener {
  -
  -		private HashMap properties = new HashMap();
  -
  -		public Map getProperties() {
  -				return properties;
  -		}
  -
  -		public void processPOIFSReaderEvent(POIFSReaderEvent event) {
  -			PropertySet ps = null;
  -			try {
  -				ps = PropertySetFactory.create(event.getStream());
  -			} catch (NoPropertySetStreamException ex) {
  -				return;
  -			} catch (Exception ex) {
  -				throw new RuntimeException("Property set stream \"" + event.getPath() + event.getName() + "\": " + ex);
  -			}
  -			String eventName = event.getName().trim();
  -			final long sectionCount = ps.getSectionCount();
  -			List sections = ps.getSections();
  -			int nr = 0;
  -			for (Iterator i = sections.iterator(); i.hasNext();) {
  -				Section sec = (Section) i.next();
  -				int propertyCount = sec.getPropertyCount();
  -				Property[] props = sec.getProperties();
  -				for (int i2 = 0; i2 < props.length; i2++) {
  -					Property p = props[i2];
  -					int id = p.getID();
  -					long type = p.getType();
  -					Object value = p.getValue();
  -					String key = eventName + "-" + nr + "-" + id; 
  -					if ( propertyMap.containsKey(key) ) {
  -						properties.put(propertyMap.get(key), value);
  -					}
  -				}
  -			}
  -		}
  -	}
  -
  -	public void configure(Configuration configuration) throws ConfigurationException {
  -        Enumeration instructions = configuration.getConfigurations("instruction");
  -        while (instructions.hasMoreElements()) {
  -            Configuration extract = (Configuration)instructions.nextElement();
  -            String property = extract.getAttribute("property");
  -            String id = extract.getAttribute("id");
  -			propertyMap.put(id, property);
  -        }
  -	}
  +package org.apache.slide.extractor;
  +
  +import java.io.InputStream;
  +import java.util.*;
  +
  +import org.apache.poi.hpsf.*;
  +import org.apache.poi.poifs.eventfilesystem.*;
  +import org.apache.slide.util.conf.Configurable;
  +import org.apache.slide.util.conf.Configuration;
  +import org.apache.slide.util.conf.ConfigurationException;
  +
  +/**
  + * The OfficeExtractor class
  + * 
  + */
  +public class OfficeExtractor extends AbstractPropertyExtractor implements Configurable {
  +	protected List instructions = new ArrayList();
  +	protected Map propertyMap = new HashMap();
  +	
  +	public OfficeExtractor(String uri, String contentType, String namespace) {
  +		super(uri, contentType, namespace);
  +	}
  +
  +	public Map extract(InputStream content) throws ExtractorException {
  +		OfficePropertiesListener listener = new OfficePropertiesListener();
  +		try {
  +			POIFSReader r = new POIFSReader();
  +			r.registerListener(listener);
  +			r.read(content);
  +		} catch (Exception e) {
  +			throw new ExtractorException("Exception while extracting properties in OfficeExtractor");
  +		}
  +		return listener.getProperties();
  +	}
  +
  +	class OfficePropertiesListener implements POIFSReaderListener {
  +
  +		private HashMap properties = new HashMap();
  +
  +		public Map getProperties() {
  +				return properties;
  +		}
  +
  +		public void processPOIFSReaderEvent(POIFSReaderEvent event) {
  +			PropertySet ps = null;
  +			try {
  +				ps = PropertySetFactory.create(event.getStream());
  +			} catch (NoPropertySetStreamException ex) {
  +				return;
  +			} catch (Exception ex) {
  +				throw new RuntimeException("Property set stream \"" + event.getPath() + event.getName() + "\": " + ex);
  +			}
  +			String eventName = event.getName().trim();
  +			final long sectionCount = ps.getSectionCount();
  +			List sections = ps.getSections();
  +			int nr = 0;
  +			for (Iterator i = sections.iterator(); i.hasNext();) {
  +				Section sec = (Section) i.next();
  +				int propertyCount = sec.getPropertyCount();
  +				Property[] props = sec.getProperties();
  +				for (int i2 = 0; i2 < props.length; i2++) {
  +					Property p = props[i2];
  +					int id = p.getID();
  +					long type = p.getType();
  +					Object value = p.getValue();
  +					String key = eventName + "-" + nr + "-" + id; 
  +					if ( propertyMap.containsKey(key) ) {
  +						properties.put(propertyMap.get(key), value);
  +					}
  +				}
  +			}
  +		}
  +	}
  +
  +	public void configure(Configuration configuration) throws ConfigurationException {
  +        Enumeration instructions = configuration.getConfigurations("instruction");
  +        while (instructions.hasMoreElements()) {
  +            Configuration extract = (Configuration)instructions.nextElement();
  +            String property = extract.getAttribute("property");
  +            String id = extract.getAttribute("id");
  +			propertyMap.put(id, property);
  +        }
  +	}
   }
  
  
  
  1.5       +7 -3      jakarta-slide/src/share/org/apache/slide/extractor/Extractor.java
  
  Index: Extractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/Extractor.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- Extractor.java	28 Jul 2004 09:36:01 -0000	1.4
  +++ Extractor.java	29 Sep 2004 15:28:06 -0000	1.5
  @@ -28,7 +28,11 @@
    * 
    */
   public interface Extractor {
  +
       public String getContentType();
   
       public String getUri();
  +
  +    public String getNamespace();
  +
   }
  
  
  
  1.3       +6 -6      jakarta-slide/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
  
  Index: MSPowerPointExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- MSPowerPointExtractor.java	14 Jul 2004 08:01:31 -0000	1.2
  +++ MSPowerPointExtractor.java	29 Sep 2004 15:28:06 -0000	1.3
  @@ -40,8 +40,8 @@
   {
       private ByteArrayOutputStream writer = new ByteArrayOutputStream();
   
  -    public MSPowerPointExtractor(String uri, String contentType) {
  -        super(uri, contentType);
  +    public MSPowerPointExtractor(String uri, String contentType, String namespace) {
  +        super(uri, contentType, namespace);
       }
   
       public Reader extract(InputStream content)  throws ExtractorException {
  @@ -91,7 +91,7 @@
       {
           FileInputStream in = new FileInputStream(args[0]);
   
  -        MSPowerPointExtractor ex = new MSPowerPointExtractor(null, null);
  +        MSPowerPointExtractor ex = new MSPowerPointExtractor(null, null, null);
   
           Reader reader = ex.extract(in);
   
  
  
  
  1.2       +6 -6      jakarta-slide/src/share/org/apache/slide/extractor/PDFExtractor.java
  
  Index: PDFExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/PDFExtractor.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- PDFExtractor.java	29 Jun 2004 08:10:57 -0000	1.1
  +++ PDFExtractor.java	29 Sep 2004 15:28:06 -0000	1.2
  @@ -37,9 +37,9 @@
   public class PDFExtractor extends AbstractContentExtractor
   {
   
  -    public PDFExtractor(String uri, String contentType)
  +    public PDFExtractor(String uri, String contentType, String namespace)
       {
  -        super(uri, contentType);
  +        super(uri, contentType, namespace);
       }
   
       public Reader extract(InputStream content)  throws ExtractorException
  @@ -72,7 +72,7 @@
       {
           FileInputStream in = new FileInputStream(args[0]);
   
  -        PDFExtractor ex = new PDFExtractor(null, null);
  +        PDFExtractor ex = new PDFExtractor(null, null, null);
   
           Reader reader = ex.extract(in);
   
  
  
  
  1.8       +6 -5      jakarta-slide/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java
  
  Index: SimpleXmlExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java,v
  retrieving revision 1.7
  retrieving revision 1.8
  diff -u -r1.7 -r1.8
  --- SimpleXmlExtractor.java	28 Jul 2004 09:36:01 -0000	1.7
  +++ SimpleXmlExtractor.java	29 Sep 2004 15:28:06 -0000	1.8
  @@ -42,10 +42,11 @@
    * 
    */
   public class SimpleXmlExtractor extends AbstractPropertyExtractor implements Configurable {
  +
       protected List instructions = new ArrayList();
   
  -    public SimpleXmlExtractor(String uri, String contentType) {
  -        super(uri, contentType);
  +    public SimpleXmlExtractor(String uri, String contentType, String namespace) {
  +        super(uri, contentType, namespace);
       }
   
       public Map extract(InputStream content) throws ExtractorException {
  
  
  
  1.6       +6 -5      jakarta-slide/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java
  
  Index: PropertyExtractorTrigger.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/PropertyExtractorTrigger.java,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- PropertyExtractorTrigger.java	28 Jul 2004 09:36:01 -0000	1.5
  +++ PropertyExtractorTrigger.java	29 Sep 2004 15:28:06 -0000	1.6
  @@ -45,11 +45,12 @@
   
       public void store(ContentEvent event) throws VetoException {
           try {
  +            String namespaceName = event.getNamespace() == null ? null : event.getNamespace().getName();
               NodeRevisionDescriptors descriptors = event.getRevisionDescriptors();
               NodeRevisionDescriptor descriptor = event.getRevisionDescriptor();
               NodeRevisionContent content = event.getRevisionContent();
               if ( content != null && descriptor != null ) {
  -                PropertyExtractor[] extractor = ExtractorManager.getInstance().getPropertyExtractors(descriptors, descriptor);
  +                PropertyExtractor[] extractor = ExtractorManager.getInstance().getPropertyExtractors(namespaceName, descriptors, descriptor);
                   for ( int i = 0; i < extractor.length; i++ ) {
                       Map extractedProperties = extractor[i].extract(new ByteArrayInputStream(content.getContentBytes()));
                       for ( Iterator j = extractedProperties.entrySet().iterator(); j.hasNext(); ) {
  
  
  
  1.2       +6 -6      jakarta-slide/src/share/org/apache/slide/extractor/MSExcelExtractor.java
  
  Index: MSExcelExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/MSExcelExtractor.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- MSExcelExtractor.java	29 Jun 2004 08:10:57 -0000	1.1
  +++ MSExcelExtractor.java	29 Sep 2004 15:28:06 -0000	1.2
  @@ -40,8 +40,8 @@
   
   public class MSExcelExtractor  extends AbstractContentExtractor
   {
  -    public MSExcelExtractor(String uri, String contentType) {
  -      super(uri, contentType);
  +    public MSExcelExtractor(String uri, String contentType, String namespace) {
  +      super(uri, contentType, namespace);
       }
   
       public Reader extract(InputStream content)  throws ExtractorException
  @@ -92,7 +92,7 @@
       {
           FileInputStream in = new FileInputStream(args[0]);
   
  -        MSExcelExtractor ex = new MSExcelExtractor(null, null);
  +        MSExcelExtractor ex = new MSExcelExtractor(null, null, null);
   
           Reader reader = ex.extract(in);
   
  
  
  
  1.68      +41 -4     jakarta-slide/src/share/org/apache/slide/common/Namespace.java
  
  Index: Namespace.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/common/Namespace.java,v
  retrieving revision 1.67
  retrieving revision 1.68
  diff -u -r1.67 -r1.68
  --- Namespace.java	23 Sep 2004 10:46:56 -0000	1.67
  +++ Namespace.java	29 Sep 2004 15:28:06 -0000	1.68
  @@ -23,15 +23,20 @@
   
   package org.apache.slide.common;
   
  +import java.lang.reflect.Constructor;
   import java.lang.reflect.Method;
   import java.util.Enumeration;
   import java.util.Hashtable;
   import java.util.Vector;
  +
   import javax.transaction.Status;
   import javax.transaction.SystemException;
   import javax.transaction.TransactionManager;
  +
   import org.apache.slide.authenticate.CredentialsToken;
   import org.apache.slide.content.ContentInterceptor;
  +import org.apache.slide.extractor.Extractor;
  +import org.apache.slide.extractor.ExtractorManager;
   import org.apache.slide.store.ContentStore;
   import org.apache.slide.store.DefaultIndexer;
   import org.apache.slide.store.IndexStore;
  @@ -45,6 +50,7 @@
   import org.apache.slide.structure.ObjectAlreadyExistsException;
   import org.apache.slide.structure.SubjectNode;
   import org.apache.slide.transaction.SlideTransactionManager;
  +import org.apache.slide.util.conf.Configurable;
   import org.apache.slide.util.conf.Configuration;
   import org.apache.slide.util.conf.ConfigurationException;
   import org.apache.slide.util.logger.Logger;
  @@ -883,6 +889,37 @@
           
       }
       
  +    
  +    void loadExtractors(Configuration namespaceExtractorsDefinition) 
  +    	throws SlideException {
  +        
  +        getLogger().log("Loading namespace " + getName() + " extractors",LOG_CHANNEL,Logger.INFO);
  +        
  +        Enumeration extractorConfigs = namespaceExtractorsDefinition.getConfigurations("extractor");
  +        while (extractorConfigs.hasMoreElements()) {
  +            Configuration extractorConfig = (Configuration) extractorConfigs.nextElement();
  +            String classname = extractorConfig.getAttribute("classname");
  +            String uri = extractorConfig.getAttribute("uri", null);
  +            String contentType = extractorConfig.getAttribute("content-type", null);
  +            String namespace = getName();
  +            try {
  +                Class extractorClass = Class.forName(classname);
  +                Extractor extractor = null;
  +                Constructor extractorConstructor = extractorClass.getConstructor(new Class[] { String.class, String.class, String.class } );
  +                extractor = (Extractor)extractorConstructor.newInstance(new String[] { uri, contentType, namespace });
  +                if ( extractor instanceof Configurable ) {
  +                    ((Configurable)extractor).configure(extractorConfig.getConfiguration("configuration"));
  +                }
  +                ExtractorManager.getInstance().addExtractor(extractor);
  +            } catch (ClassCastException e) {
  +                throw new ConfigurationException("Extractor '"+classname+"' is not of type Extractor", namespaceExtractorsDefinition);
  +            } catch (ConfigurationException e) {
  +                throw e;
  +            } catch (Exception e) {
  +                throw new ConfigurationException("Extractor '"+classname+"' could not be loaded", namespaceExtractorsDefinition);
  +            }
  +        }
  +    }
       
       // -------------------------------------------------------- Private Methods
       
  
  
  
  1.49      +13 -8     jakarta-slide/src/share/org/apache/slide/common/Domain.java
  
  Index: Domain.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/common/Domain.java,v
  retrieving revision 1.48
  retrieving revision 1.49
  diff -u -r1.48 -r1.49
  --- Domain.java	28 Jul 2004 09:38:20 -0000	1.48
  +++ Domain.java	29 Sep 2004 15:28:06 -0000	1.49
  @@ -848,10 +848,15 @@
               }
               
               namespace.loadConfiguration(namespaceConfigurationDefinition);
  -            
  -            
  -            
  -            
  +
  +            try {
  +                Configuration namespaceExtractorsDefinition = 
  +                    configuration.getConfiguration("extractors");
  +                namespace.loadExtractors(namespaceExtractorsDefinition);
  +            } catch (ConfigurationException e) {
  +                // ignore
  +            }
  +
               // preparation to add services, please ignore now
               try {
                   Configuration services = configuration.getConfiguration("services");
  
  
  
  1.7       +5 -5      jakarta-slide/src/stores/org/apache/slide/index/TextContentIndexer.java
  
  Index: TextContentIndexer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/stores/org/apache/slide/index/TextContentIndexer.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- TextContentIndexer.java	13 Sep 2004 16:52:23 -0000	1.6
  +++ TextContentIndexer.java	29 Sep 2004 15:28:06 -0000	1.7
  @@ -142,7 +142,7 @@
            doc.add(Field.Text(CONTENT_TEXT, readContent(revisionDescriptor, revisionContent)));
   
            if ( revisionContent != null && revisionDescriptor != null ) {
  -            ContentExtractor[] extractor = ExtractorManager.getInstance().getContentExtractors(null, revisionDescriptor);
  +            ContentExtractor[] extractor = ExtractorManager.getInstance().getContentExtractors(uri.getNamespace().getName(), null, revisionDescriptor);
                    for ( int i = 0; i < extractor.length; i++ ) {
                         Reader reader = extractor[i].extract(new ByteArrayInputStream(revisionContent.getContentBytes()));
                         doc.add(Field.Text(CONTENT_TEXT, reader));
  @@ -217,7 +217,7 @@
               doc.add(Field.Text(CONTENT_TEXT, readContent(revisionDescriptor, revisionContent)));
   
               if ( revisionContent != null && revisionDescriptor != null ) {
  -                 ContentExtractor[] extractor = ExtractorManager.getInstance().getContentExtractors(null, revisionDescriptor);
  +                 ContentExtractor[] extractor = ExtractorManager.getInstance().getContentExtractors(uri.getNamespace().getName(), null, revisionDescriptor);
                    for ( int i = 0; i < extractor.length; i++ ) {
                         Reader reader = extractor[i].extract(new ByteArrayInputStream(revisionContent.getContentBytes()));
                         doc.add(Field.Text(CONTENT_TEXT, reader));
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: slide-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: slide-dev-help@jakarta.apache.org