You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by vg...@apache.org on 2002/02/27 06:28:38 UTC

cvs commit: xml-cocoon2/src/java/org/apache/cocoon/components/search SimpleLuceneXMLIndexerImpl.java

vgritsenko    02/02/26 21:28:38

  Modified:    src/java/org/apache/cocoon/components/crawler
                        SimpleCocoonCrawlerImpl.java
               src/java/org/apache/cocoon/components/search
                        SimpleLuceneXMLIndexerImpl.java
  Log:
  fix issue with content type containing encoding
  
  Revision  Changes    Path
  1.8       +9 -5      xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java
  
  Index: SimpleCocoonCrawlerImpl.java
  ===================================================================
  RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/crawler/SimpleCocoonCrawlerImpl.java,v
  retrieving revision 1.7
  retrieving revision 1.8
  diff -u -r1.7 -r1.8
  --- SimpleCocoonCrawlerImpl.java	22 Feb 2002 07:00:06 -0000	1.7
  +++ SimpleCocoonCrawlerImpl.java	27 Feb 2002 05:28:38 -0000	1.8
  @@ -82,7 +82,7 @@
    * A simple cocoon crawler.
    *
    * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
  - * @version CVS $Id: SimpleCocoonCrawlerImpl.java,v 1.7 2002/02/22 07:00:06 cziegeler Exp $
  + * @version CVS $Id: SimpleCocoonCrawlerImpl.java,v 1.8 2002/02/27 05:28:38 vgritsenko Exp $
    */
   public class SimpleCocoonCrawlerImpl extends AbstractLoggable
       implements CocoonCrawler, Configurable, Disposable, Recyclable
  @@ -479,12 +479,16 @@
               InputStream is = links_url_connection.getInputStream();
               BufferedReader br = new BufferedReader(new InputStreamReader(is));
   
  -            String content_type = links_url_connection.getContentType();
  +            String contentType = links_url_connection.getContentType();
  +            int index = contentType.indexOf(';');
  +            if (contentType != null && index != -1) {
  +                contentType = contentType.substring(0, index);
  +            }
               if (getLogger().isDebugEnabled()) {
  -                getLogger().debug("Content-type: " + content_type);
  +                getLogger().debug("Content-type: " + contentType);
               }
   
  -            if (content_type.equals(linkContentType)) {
  +            if (contentType.equals(linkContentType)) {
                   url_links = new ArrayList();
   
                   // content is supposed to be a list of links,
  @@ -598,7 +602,7 @@
        * </p>
        *
        * @author     <a href="mailto:berni_huber@a1.net>Bernhard Huber</a>
  -     * @version    $Id: SimpleCocoonCrawlerImpl.java,v 1.7 2002/02/22 07:00:06 cziegeler Exp $
  +     * @version    $Id: SimpleCocoonCrawlerImpl.java,v 1.8 2002/02/27 05:28:38 vgritsenko Exp $
        */
       public static class CocoonCrawlerIterator implements Iterator
       {
  
  
  
  1.8       +7 -1      xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java
  
  Index: SimpleLuceneXMLIndexerImpl.java
  ===================================================================
  RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/search/SimpleLuceneXMLIndexerImpl.java,v
  retrieving revision 1.7
  retrieving revision 1.8
  diff -u -r1.7 -r1.8
  --- SimpleLuceneXMLIndexerImpl.java	22 Feb 2002 07:00:12 -0000	1.7
  +++ SimpleLuceneXMLIndexerImpl.java	27 Feb 2002 05:28:38 -0000	1.8
  @@ -97,7 +97,7 @@
    * A simple class building lucene documents from xml content.
    *
    * @author     <a href="mailto:berni_huber@a1.net">Bernhard Huber</a>
  - * @version CVS $Id: SimpleLuceneXMLIndexerImpl.java,v 1.7 2002/02/22 07:00:12 cziegeler Exp $
  + * @version CVS $Id: SimpleLuceneXMLIndexerImpl.java,v 1.8 2002/02/27 05:28:38 vgritsenko Exp $
    */
   public class SimpleLuceneXMLIndexerImpl extends AbstractLoggable
            implements LuceneXMLIndexer, Configurable, Composable, ThreadSafe
  @@ -175,6 +175,12 @@
                   + CONTENT_QUERY);
               URLConnection contentURLConnection = contentURL.openConnection();
               String contentType = contentURLConnection.getContentType();
  +
  +            int index = contentType.indexOf(';');
  +            if (contentType != null && index != -1) {
  +                contentType = contentType.substring(0, index);
  +            }
  +
               if (contentType != null &&
                       allowedContentType.contains(contentType)) {
   
  
  
  

----------------------------------------------------------------------
In case of troubles, e-mail:     webmaster@xml.apache.org
To unsubscribe, e-mail:          cocoon-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: cocoon-cvs-help@xml.apache.org