You are viewing a plain text version of this content. The canonical link for it is here.
Posted to slide-dev@jakarta.apache.org by lu...@apache.org on 2005/01/14 19:34:14 UTC

cvs commit: jakarta-slide/src/share/org/apache/slide/extractor AbstractContentExtractor.java AbstractPropertyExtractor.java Extractor.java ExtractorManager.java MSExcelExtractor.java MSPowerPointExtractor.java MSWordExtractor.java OfficeExtractor.java PDFExtractor.java SimpleXmlExtractor.java TextContentExtractor.java XmlContentExtractor.java

luetzkendorf    2005/01/14 10:34:14

  Modified:    src/share/org/apache/slide/extractor
                        AbstractContentExtractor.java
                        AbstractPropertyExtractor.java Extractor.java
                        ExtractorManager.java MSExcelExtractor.java
                        MSPowerPointExtractor.java MSWordExtractor.java
                        OfficeExtractor.java PDFExtractor.java
                        SimpleXmlExtractor.java TextContentExtractor.java
                        XmlContentExtractor.java
  Log:
  Improvements by Eirikur S. Hrafnsson for Content-Type handling in
  the extractor framework (see Bugzilla 33065)
  
  Revision  Changes    Path
  1.5       +20 -4     jakarta-slide/src/share/org/apache/slide/extractor/AbstractContentExtractor.java
  
  Index: AbstractContentExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/AbstractContentExtractor.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- AbstractContentExtractor.java	29 Sep 2004 15:28:06 -0000	1.4
  +++ AbstractContentExtractor.java	14 Jan 2005 18:34:13 -0000	1.5
  @@ -47,10 +47,26 @@
   
       public abstract Reader extract(InputStream content) throws ExtractorException;
   
  +    /* (non-Javadoc)
  +     * @see org.apache.slide.extractor.Extractor#getContentType()
  +     */
       public String getContentType() {
           return contentType;
       }
  -
  +    
  +	/**
  +	 * Default implementation returns true if getContentType() contains the fileToIndexContentType<br/>
  +	 * OR if getContentType() returns null.
  +	 * @param fileToIndexContentType The content type of the file we want to index.
  +	 */
  +	public boolean isAcceptableContentType(String fileToIndexContentType) {
  +		if(getContentType()!=null){
  +			//return true if the contentType string contains fileToIndexContentType
  +			return (getContentType().indexOf(fileToIndexContentType)>=0);
  +		}
  +		return true;
  +	}
  +	
       public String getUri() {
           return uri;
       }
  
  
  
  1.4       +20 -4     jakarta-slide/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java
  
  Index: AbstractPropertyExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/AbstractPropertyExtractor.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- AbstractPropertyExtractor.java	29 Sep 2004 15:28:06 -0000	1.3
  +++ AbstractPropertyExtractor.java	14 Jan 2005 18:34:13 -0000	1.4
  @@ -23,8 +23,8 @@
   
   package org.apache.slide.extractor;
   
  -import java.util.Map;
   import java.io.InputStream;
  +import java.util.Map;
   
   /**
    * The AbstractPropertyExtractor class
  @@ -47,6 +47,9 @@
   
       public abstract Map extract(InputStream content) throws ExtractorException;
   
  +    /* (non-Javadoc)
  +     * @see org.apache.slide.extractor.Extractor#getContentType()
  +     */
       public String getContentType() {
           return contentType;
       }
  @@ -58,5 +61,18 @@
       public String getNamespace() {
           return namespace;
       }
  +    
  +	/**
  +	 * Default implementation returns true if getContentType() contains the fileToIndexContentType<br/>
  +	 * OR if getContentType() returns null.
  +	 * @param fileToIndexContentType The content type of the file we want to index.
  +	 */
  +	public boolean isAcceptableContentType(String fileToIndexContentType) {
  +		if(getContentType()!=null){
  +			//return true if the contentType string contains fileToIndexContentType
  +			return (getContentType().indexOf(fileToIndexContentType)>=0);
  +		}
  +		return true;
  +	}
   
   }
  
  
  
  1.6       +26 -6     jakarta-slide/src/share/org/apache/slide/extractor/Extractor.java
  
  Index: Extractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/Extractor.java,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- Extractor.java	29 Sep 2004 15:28:06 -0000	1.5
  +++ Extractor.java	14 Jan 2005 18:34:13 -0000	1.6
  @@ -24,15 +24,35 @@
   package org.apache.slide.extractor;
   
   /**
  - * The Extractor interface
  - * 
  + * The Extractor interface.<br/>
  + * Default init parameters in Domain.xml include:<br/>
  + * <li>"classname" - The extractor class.</li>
  + * <li>"uri" - The URI the extractor handles.</li>
  + * <li>"content-type" - A comma separated list of supported content types</li>
  + * <li>"namespace" - The namespace the extractor handles.</li>
    */
   public interface Extractor {
   
  -    public String getContentType();
  +	/**
  +	 * @param contentType of the file to index
  +	 * @return true if this extractor can handle indexing a file of the supplied contentType otherwise false.
  +	 */
  +    public boolean isAcceptableContentType(String contentType);
   
  +    /**
  +     * @return a comma separated list of content types this extractor is registered to handle.<br/>
  +     * May return null also if it handles any type of file.
  +     */
  +    public String getContentType();
  +    
  +    /**
  +     * @return The URI this extractor is registered to handle.
  +     */
       public String getUri();
   
  +    /**
  +     * @return The namespace this extractor is registered to handle.
  +     */
       public String getNamespace();
   
   }
  
  
  
  1.7       +15 -15    jakarta-slide/src/share/org/apache/slide/extractor/ExtractorManager.java
  
  Index: ExtractorManager.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/ExtractorManager.java,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- ExtractorManager.java	8 Nov 2004 09:37:43 -0000	1.6
  +++ ExtractorManager.java	14 Jan 2005 18:34:13 -0000	1.7
  @@ -23,17 +23,20 @@
   
   package org.apache.slide.extractor;
   
  +import java.lang.reflect.Constructor;
  +import java.util.ArrayList;
  +import java.util.Enumeration;
  +import java.util.Iterator;
  +import java.util.List;
  +import org.apache.slide.content.NodeRevisionDescriptor;
  +import org.apache.slide.content.NodeRevisionDescriptors;
   import org.apache.slide.util.conf.Configurable;
   import org.apache.slide.util.conf.Configuration;
   import org.apache.slide.util.conf.ConfigurationException;
  -import org.apache.slide.content.NodeRevisionDescriptors;
  -import org.apache.slide.content.NodeRevisionDescriptor;
  -
  -import java.util.*;
  -import java.lang.reflect.Constructor;
   
   /**
    * The ExtractorManager class
  + * 
    */
   public class ExtractorManager implements Configurable {
       private final static ExtractorManager manager = new ExtractorManager();
  @@ -103,8 +106,7 @@
       {
           for ( Iterator i = extractors.iterator(); i.hasNext(); ) {
               Extractor extractor = (Extractor)i.next();
  -            if ( extractor instanceof ContentExtractor && 
  -                    matches(extractor, namespace, uri, descriptor)) {
  +            if ( extractor instanceof ContentExtractor && matches(extractor, namespace, uri, descriptor)) {
                   return true;
               }
           }
  @@ -113,7 +115,7 @@
   
       static boolean matches(Extractor extractor, String namespace, NodeRevisionDescriptors descriptors, NodeRevisionDescriptor descriptor) {
           boolean matching = true;
  -        if ( descriptor != null && extractor.getContentType() != null && !descriptor.getContentType().equals(extractor.getContentType()) ) {
  +        if ( descriptor != null && !extractor.isAcceptableContentType(descriptor.getContentType())) {
               matching = false;
           }
           if ( descriptors != null && extractor.getUri() != null && !descriptors.getUri().startsWith(extractor.getUri()) ) {
  @@ -125,10 +127,8 @@
           return matching;
       }
       
  -    static boolean matches(Extractor extractor, String namespace, String uri, 
  -            NodeRevisionDescriptor descriptor) 
  -    {
  -        if ( descriptor != null && !descriptor.getContentType().equals(extractor.getContentType()) ) {
  +    static boolean matches(Extractor extractor, String namespace, String uri, NodeRevisionDescriptor descriptor) {
  +        if ( descriptor != null && !extractor.isAcceptableContentType(descriptor.getContentType()) ) {
               return false;
           }
           if ( extractor.getUri() != null && !uri.startsWith(extractor.getUri()) ) {
  
  
  
  1.3       +31 -17    jakarta-slide/src/share/org/apache/slide/extractor/MSExcelExtractor.java
  
  Index: MSExcelExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/MSExcelExtractor.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- MSExcelExtractor.java	29 Sep 2004 15:28:06 -0000	1.2
  +++ MSExcelExtractor.java	14 Jan 2005 18:34:13 -0000	1.3
  @@ -23,23 +23,27 @@
   
   package org.apache.slide.extractor;
   
  -/**
  - * Author: Ryan Rhodes
  - * Date: Jun 26, 2004
  - * Time: 1:53:31 AM
  - */
  -
  -import java.io.*;
  +import java.io.CharArrayReader;
  +import java.io.CharArrayWriter;
  +import java.io.FileInputStream;
  +import java.io.InputStream;
  +import java.io.Reader;
   import java.util.Iterator;
  -
  -import org.apache.poi.poifs.filesystem.POIFSFileSystem;
  -import org.apache.poi.hssf.usermodel.HSSFWorkbook;
  -import org.apache.poi.hssf.usermodel.HSSFSheet;
  -import org.apache.poi.hssf.usermodel.HSSFRow;
   import org.apache.poi.hssf.usermodel.HSSFCell;
  +import org.apache.poi.hssf.usermodel.HSSFRow;
  +import org.apache.poi.hssf.usermodel.HSSFSheet;
  +import org.apache.poi.hssf.usermodel.HSSFWorkbook;
  +import org.apache.poi.poifs.filesystem.POIFSFileSystem;
   
  -public class MSExcelExtractor  extends AbstractContentExtractor
  -{
  +/**
  + * Content extractor for Microsoft Excel documents.
  + */
  +public class MSExcelExtractor extends AbstractContentExtractor {
  +	
  +	static final String CONTENT_TYPE_EXCEL_1 = "application/msexcel";
  +	static final String CONTENT_TYPE_EXCEL_2 = "application/vnd.ms-excel";
  +	static final String CONTENT_TYPE_EXCEL_ALL_CSV = CONTENT_TYPE_EXCEL_1+","+CONTENT_TYPE_EXCEL_2;
  +	
       public MSExcelExtractor(String uri, String contentType, String namespace) {
         super(uri, contentType, namespace);
       }
  @@ -104,4 +108,14 @@
           }
           while(c != -1);
       }
  +    
  +	/* (non-Javadoc)
  +	 * @see org.apache.slide.extractor.Extractor#getContentType()
  +	 */
  +	public String getContentType() {
  +		if(super.getContentType()==null){
  +			return CONTENT_TYPE_EXCEL_ALL_CSV;
  +		}
  +		return super.getContentType();
  +	}
   }
  
  
  
  1.4       +29 -13    jakarta-slide/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java
  
  Index: MSPowerPointExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/MSPowerPointExtractor.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- MSPowerPointExtractor.java	29 Sep 2004 15:28:06 -0000	1.3
  +++ MSPowerPointExtractor.java	14 Jan 2005 18:34:13 -0000	1.4
  @@ -23,21 +23,27 @@
   
   package org.apache.slide.extractor;
   
  -import org.apache.poi.util.LittleEndian;
  -import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
  -import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
  +import java.io.ByteArrayInputStream;
  +import java.io.ByteArrayOutputStream;
  +import java.io.FileInputStream;
  +import java.io.InputStream;
  +import java.io.InputStreamReader;
  +import java.io.Reader;
   import org.apache.poi.poifs.eventfilesystem.POIFSReader;
  +import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
  +import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
   import org.apache.poi.poifs.filesystem.DocumentInputStream;
  -
  -import java.io.*;
  +import org.apache.poi.util.LittleEndian;
   
   /**
  - * Author: Ryan Rhodes
  - * Date: Jun 27, 2004
  - * Time: 3:45:39 AM
  + * Content extractor for Microsoft Powerpoint documents.
    */
  -public class MSPowerPointExtractor extends AbstractContentExtractor implements POIFSReaderListener
  -{
  +public class MSPowerPointExtractor extends AbstractContentExtractor implements POIFSReaderListener{
  +
  +    static final String CONTENT_TYPE_POWERPOINT_1 = "application/mspowerpoint";
  +    static final String CONTENT_TYPE_POWERPOINT_2 = "application/vnd.ms-powerpoint";
  +    static final String CONTENT_TYPE_POWERPOINT_ALL_CSV = CONTENT_TYPE_POWERPOINT_1+","+CONTENT_TYPE_POWERPOINT_2;
  +	
       private ByteArrayOutputStream writer = new ByteArrayOutputStream();
   
       public MSPowerPointExtractor(String uri, String contentType, String namespace) {
  @@ -104,4 +110,14 @@
           }
           while( c != -1 );
       }
  +    
  +	/* (non-Javadoc)
  +	 * @see org.apache.slide.extractor.Extractor#getContentType()
  +	 */
  +	public String getContentType() {
  +		if(super.getContentType()==null){
  +			return CONTENT_TYPE_POWERPOINT_ALL_CSV;
  +		}
  +		return super.getContentType();
  +	}
   }
  
  
  
  1.3       +26 -13    jakarta-slide/src/share/org/apache/slide/extractor/MSWordExtractor.java
  
  Index: MSWordExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/MSWordExtractor.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- MSWordExtractor.java	29 Sep 2004 15:28:06 -0000	1.2
  +++ MSWordExtractor.java	14 Jan 2005 18:34:13 -0000	1.3
  @@ -23,17 +23,20 @@
   
   package org.apache.slide.extractor;
   
  -/**
  - * Author: Ryan Rhodes
  - * Date: Jun 26, 2004
  - * Time: 12:34:29 AM
  - */
  -
  -import java.io.*;
  -
  +import java.io.FileInputStream;
  +import java.io.InputStream;
  +import java.io.Reader;
  +import java.io.StringReader;
   import org.textmining.text.extraction.WordExtractor;
   
  +/**
  + * Content extractor for Microsoft Word documents.
  + */
   public class MSWordExtractor extends AbstractContentExtractor {
  +	
  +    static final String CONTENT_TYPE_WORD_1 = "application/msword";
  +    static final String CONTENT_TYPE_WORD_2 = "application/vnd.ms-word";
  +    static final String CONTENT_TYPE_WORD_ALL_CSV = CONTENT_TYPE_WORD_1+","+CONTENT_TYPE_WORD_2;
   
       public MSWordExtractor(String uri, String contentType, String namespace) {
           super(uri, contentType, namespace);
  @@ -41,8 +44,7 @@
   
       public Reader extract(InputStream content)  throws ExtractorException {
           try {
  -            WordExtractor  extractor =
  -                    new WordExtractor();
  +            WordExtractor  extractor = new WordExtractor();
               String text = extractor.extractText(content);          
   
               StringReader reader = new StringReader(text);
  @@ -70,4 +72,15 @@
               }
               while( c != -1 );
           }
  +        
  +    	/* (non-Javadoc)
  +    	 * @see org.apache.slide.extractor.Extractor#getContentType()
  +    	 */
  +    	public String getContentType() {
  +    		if(super.getContentType()==null){
  +    			return CONTENT_TYPE_WORD_ALL_CSV;
  +    		}
  +    		return super.getContentType();
  +    	}
  +    	
   }
  
  
  
  1.4       +27 -6     jakarta-slide/src/share/org/apache/slide/extractor/OfficeExtractor.java
  
  Index: OfficeExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/OfficeExtractor.java,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- OfficeExtractor.java	29 Sep 2004 15:28:06 -0000	1.3
  +++ OfficeExtractor.java	14 Jan 2005 18:34:13 -0000	1.4
  @@ -1,21 +1,31 @@
   package org.apache.slide.extractor;
   
   import java.io.InputStream;
  -import java.util.*;
  -
  -import org.apache.poi.hpsf.*;
  -import org.apache.poi.poifs.eventfilesystem.*;
  +import java.util.ArrayList;
  +import java.util.Enumeration;
  +import java.util.HashMap;
  +import java.util.Iterator;
  +import java.util.List;
  +import java.util.Map;
  +import org.apache.poi.hpsf.NoPropertySetStreamException;
  +import org.apache.poi.hpsf.Property;
  +import org.apache.poi.hpsf.PropertySet;
  +import org.apache.poi.hpsf.PropertySetFactory;
  +import org.apache.poi.hpsf.Section;
  +import org.apache.poi.poifs.eventfilesystem.POIFSReader;
  +import org.apache.poi.poifs.eventfilesystem.POIFSReaderEvent;
  +import org.apache.poi.poifs.eventfilesystem.POIFSReaderListener;
   import org.apache.slide.util.conf.Configurable;
   import org.apache.slide.util.conf.Configuration;
   import org.apache.slide.util.conf.ConfigurationException;
   
   /**
  - * The OfficeExtractor class
  - * 
  + * Property extractor for Microsoft office documents.
    */
   public class OfficeExtractor extends AbstractPropertyExtractor implements Configurable {
   	protected List instructions = new ArrayList();
   	protected Map propertyMap = new HashMap();
  +	static final String CONTENT_TYPE_MS_OFFICE_ALL_CSV = MSWordExtractor.CONTENT_TYPE_WORD_ALL_CSV+","+MSExcelExtractor.CONTENT_TYPE_EXCEL_ALL_CSV+","+MSPowerPointExtractor.CONTENT_TYPE_POWERPOINT_ALL_CSV;
   	
   	public OfficeExtractor(String uri, String contentType, String namespace) {
   		super(uri, contentType, namespace);
  @@ -81,4 +91,15 @@
   			propertyMap.put(id, property);
           }
   	}
  +	
  +	/* (non-Javadoc)
  +	 * @see org.apache.slide.extractor.Extractor#getContentType()
  +	 */
  +	public String getContentType() {
  +		if(super.getContentType()==null){
  +			return CONTENT_TYPE_MS_OFFICE_ALL_CSV;
  +		}
  +		return super.getContentType();
  +	}
  +	
   }
  
  
  
  1.3       +27 -11    jakarta-slide/src/share/org/apache/slide/extractor/PDFExtractor.java
  
  Index: PDFExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/PDFExtractor.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- PDFExtractor.java	29 Sep 2004 15:28:06 -0000	1.2
  +++ PDFExtractor.java	14 Jan 2005 18:34:13 -0000	1.3
  @@ -23,19 +23,24 @@
   
   package org.apache.slide.extractor;
   
  -import org.pdfbox.util.PDFTextStripper;
  +import java.io.CharArrayReader;
  +import java.io.CharArrayWriter;
  +import java.io.FileInputStream;
  +import java.io.InputStream;
  +import java.io.Reader;
   import org.pdfbox.pdfparser.PDFParser;
   import org.pdfbox.pdmodel.PDDocument;
  -
  -import java.io.*;
  +import org.pdfbox.util.PDFTextStripper;
   
   /**
  - * Author: Ryan Rhodes
  - * Date: Jun 26, 2004
  - * Time: 4:03:00 AM
  + * Content extractor for PDF documents.
    */
  -public class PDFExtractor extends AbstractContentExtractor
  -{
  +public class PDFExtractor extends AbstractContentExtractor{
  +	
  +    private static final String CONTENT_TYPE_PDF_1 = "application/pdf";
  +    private static final String CONTENT_TYPE_PDF_2 = "application/x-pdf";
  +    private static final String CONTENT_TYPE_PDF_ALL_CSV = CONTENT_TYPE_PDF_1+","+CONTENT_TYPE_PDF_2;
  +
   
       public PDFExtractor(String uri, String contentType, String namespace)
       {
  @@ -84,4 +89,15 @@
           }
           while(c != -1);
       }
  +    
  +	/* (non-Javadoc)
  +	 * @see org.apache.slide.extractor.Extractor#getContentType()
  +	 */
  +	public String getContentType() {
  +		if(super.getContentType()==null){
  +			return CONTENT_TYPE_PDF_ALL_CSV;
  +		}	
  +		return super.getContentType();
  +	}
  +	
   }
  
  
  
  1.11      +28 -8     jakarta-slide/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java
  
  Index: SimpleXmlExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/SimpleXmlExtractor.java,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- SimpleXmlExtractor.java	29 Nov 2004 18:44:30 -0000	1.10
  +++ SimpleXmlExtractor.java	14 Jan 2005 18:34:13 -0000	1.11
  @@ -23,6 +23,14 @@
   
   package org.apache.slide.extractor;
   
  +import java.io.IOException;
  +import java.io.InputStream;
  +import java.util.ArrayList;
  +import java.util.Enumeration;
  +import java.util.HashMap;
  +import java.util.Iterator;
  +import java.util.List;
  +import java.util.Map;
   import org.apache.slide.common.PropertyName;
   import org.apache.slide.util.conf.Configurable;
   import org.apache.slide.util.conf.Configuration;
  @@ -34,16 +42,18 @@
   import org.jdom.input.SAXBuilder;
   import org.jdom.xpath.XPath;
   
  -import java.io.IOException;
  -import java.io.InputStream;
  -import java.util.*;
  -
   /**
    * The SimpleXmlExtractor class
    * 
    */
   public class SimpleXmlExtractor extends AbstractPropertyExtractor implements Configurable {
  -
  +	
  +	static final String CONTENT_TYPE_XML = "text/xml";
  +	static final String CONTENT_TYPE_XHTML = "application/xhtml+xml";
  +	//html also because xhtml can and most often has the html content type
  +	static final String CONTENT_TYPE_HTML = "text/html";
  +	static final String CONTENT_TYPE_XML_ALL_CSV = CONTENT_TYPE_XML+","+CONTENT_TYPE_XHTML+","+CONTENT_TYPE_HTML;
  +	
       protected List instructions = new ArrayList();
   
       public SimpleXmlExtractor(String uri, String contentType, String namespace) {
  @@ -133,4 +143,14 @@
               return propertyName;
           }
       }
  +
  +	/* (non-Javadoc)
  +	 * @see org.apache.slide.extractor.Extractor#getContentType()
  +	 */
  +	public String getContentType() {
  +		if(super.getContentType()==null){
  +			return CONTENT_TYPE_XML_ALL_CSV;
  +		}	
  +		return super.getContentType();
  +	}
   }
  
  
  
  1.3       +16 -5     jakarta-slide/src/share/org/apache/slide/extractor/TextContentExtractor.java
  
  Index: TextContentExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/TextContentExtractor.java,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- TextContentExtractor.java	29 Nov 2004 18:43:09 -0000	1.2
  +++ TextContentExtractor.java	14 Jan 2005 18:34:13 -0000	1.3
  @@ -30,8 +30,9 @@
   /**
    * Content extractor that simply returns the content. 
    */
  -public class TextContentExtractor extends AbstractContentExtractor
  -{
  +public class TextContentExtractor extends AbstractContentExtractor{
  +	
  +	private static final String CONTENT_TYPE_TEXT = "text/plain";
   
       public TextContentExtractor(String uri, String contentType)
       {
  @@ -46,5 +47,15 @@
       {
           return new InputStreamReader(content);
       }
  +    
  +	/* (non-Javadoc)
  +	 * @see org.apache.slide.extractor.Extractor#getContentType()
  +	 */
  +	public String getContentType() {
  +		if(super.getContentType()==null){
  +			return CONTENT_TYPE_TEXT;
  +		}	
  +		return super.getContentType();
  +	}
   
   }
  
  
  
  1.2       +13 -3     jakarta-slide/src/share/org/apache/slide/extractor/XmlContentExtractor.java
  
  Index: XmlContentExtractor.java
  ===================================================================
  RCS file: /home/cvs/jakarta-slide/src/share/org/apache/slide/extractor/XmlContentExtractor.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- XmlContentExtractor.java	29 Nov 2004 18:43:21 -0000	1.1
  +++ XmlContentExtractor.java	14 Jan 2005 18:34:13 -0000	1.2
  @@ -124,4 +124,14 @@
               return new InputSource(new StringReader(""));
           }
       }
  +
  +	/* (non-Javadoc)
  +	 * @see org.apache.slide.extractor.Extractor#getContentType()
  +	 */
  +	public String getContentType() {
  +		if(super.getContentType()==null){
  +			return SimpleXmlExtractor.CONTENT_TYPE_XML_ALL_CSV;
  +		}	
  +		return super.getContentType();
  +	}
   }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: slide-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: slide-dev-help@jakarta.apache.org