You are viewing a plain text version of this content. The canonical link for it is here.
Posted to docs-cvs@perl.apache.org by st...@apache.org on 2002/02/07 08:26:15 UTC

cvs commit: modperl-docs/src/search SwishSpiderConfig.pl

stas        02/02/06 23:26:15

  Modified:    src/search SwishSpiderConfig.pl
  Log:
  - spider only URLs under the base URL, still needs some generalization
  work.
  
  Revision  Changes    Path
  1.2       +5 -1      modperl-docs/src/search/SwishSpiderConfig.pl
  
  Index: SwishSpiderConfig.pl
  ===================================================================
  RCS file: /home/cvs/modperl-docs/src/search/SwishSpiderConfig.pl,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- SwishSpiderConfig.pl	4 Feb 2002 09:22:27 -0000	1.1
  +++ SwishSpiderConfig.pl	7 Feb 2002 07:26:15 -0000	1.2
  @@ -22,7 +22,11 @@
           delay_min       => .0001,
   
           # Ignore images files
  -        test_url        => sub { $_[0]->path !~ /\.(?:gif|jpe?g|.png)$/i },
  +        test_url => sub {
  +            return if $_[0]->path =~ /\.(?:gif|jpeg|.png|.gz)$/i;
  +            return unless $_[0]->path =~ m!^/preview/modperl-site!;
  +            return 1;
  +        },
   
           # Only index text/html
           test_response   => sub { return $_[2]->content_type =~ m[text/html] },
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: docs-cvs-unsubscribe@perl.apache.org
For additional commands, e-mail: docs-cvs-help@perl.apache.org