You are viewing a plain text version of this content. The canonical link for it is here.
Posted to docs-cvs@perl.apache.org by mo...@apache.org on 2002/06/30 07:45:16 UTC

cvs commit: modperl-docs/src/search SwishSpiderConfig.pl make.pl search.tt

moseley     2002/06/29 22:45:16

  Modified:    src/search SwishSpiderConfig.pl make.pl search.tt
  Log:
  Updated the indexing to assign unique sections IDs to docs instead of
  just trying to limit by hits on words in the path.
  
  Revision  Changes    Path
  1.10      +80 -26    modperl-docs/src/search/SwishSpiderConfig.pl
  
  Index: SwishSpiderConfig.pl
  ===================================================================
  RCS file: /home/cvs/modperl-docs/src/search/SwishSpiderConfig.pl,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- SwishSpiderConfig.pl	19 Apr 2002 19:53:33 -0000	1.9
  +++ SwishSpiderConfig.pl	30 Jun 2002 05:45:16 -0000	1.10
  @@ -1,11 +1,15 @@
   # this is the modified default spider config file that comes with swish-e.
  +# Perldoc swish.cgi for docs on the format of this file
   #
  -# a few custom callbacks are located after the @servers definition section.
  +# a few custom callbacks are located after the @servers definition section
  +# these are used to split files into sections.
   
   my $base_path = $ENV{MODPERL_SITE} || die "must set \$ENV{MODPERL_SITE}";
   
   $base_path =~ s[/$][];
   
  +# Used to fetch the available "sections" 
  +my $CHECKBOX_DATA = 'checkboxes.storable';
   
   
   @servers = (
  @@ -63,7 +67,6 @@
       # Find the <head> section for use in all split pages
       my $head = $tree->look_down( '_tag', 'head' );
   
  -
       # Now create a new "document" for each
       create_page( $head->clone, $_->clone, \%params )
           for $tree->look_down( '_tag', 'div', 'class', 'index-section' );
  @@ -73,7 +76,8 @@
       ## so don't index it.
       $tree->delete;
       return 0;
  -   
  +
  +    # old code below to index pages that don't have sections defined.
   
   
       # Indexed the page in sections, just return
  @@ -102,34 +106,35 @@
   
       my $uri = $params->{uri};
   
  +    # Grab the first <a name="..."> tag that indicates this section.
  +    # and adjust the path
   
  -    # Grab the section link, and create a new title
  +    if ( my $name = $section->look_down( '_tag', 'a', sub { defined($_[0]->attr('name')) } ) ) {
  +        $uri->fragment( $name->attr('name') );
  +    }
  +        
   
  -    my $name = $section->look_down( '_tag', 'a', sub { defined($_[0]->attr('name')) } );
   
  -    
  -    if ( $name ) {
  +    # Now grab the first <a href="..">description</a> tag
  +    if ( my $link = $section->look_down( '_tag', 'a', sub { defined($_[0]->attr('href')) } ) ) {
   
  -        my @a_content;
  -        
  -        my $section_name = $name->attr('name');
  -        $uri->fragment( $section_name );
  +        my $description = $link->as_text;
   
  -        if ( ! (@a_content = $name->content_list) ) {
  -            $section_name =~ tr/_/ /;
  -            @a_content = ( $section_name );
  -        }
  +        if ( $description ) {
   
  -        # Modify or create the title
  +            # Modify or create the title
       
  -        my $title = $head->look_down('_tag', 'title');
  +            my $title = $head->look_down('_tag', 'title');
  +
  +            if ( $title ) {
  +                $title->push_content( ": $description" );
   
  -        if ( $title ) {
  -            $title->push_content( ': ', @a_content );
  -        } else {
  -            my $title = HTML::Element->new('title');
  -            $title->push_content(  @a_content );
  -            $head->push_content( $title );
  +            } else { # Create a new title
  +            
  +                my $title = HTML::Element->new('title');
  +                $title->push_content( $description );
  +                $head->push_content( $title );
  +            }
           }
       }
   
  @@ -142,9 +147,11 @@
   
       if ( $uri =~ m!$base_path/(.+)$! ) {
           my $path = $1;
  -        $path =~ s{/?[^/]+$}{};  # remove file name, if one
  -        my $meta = HTML::Element->new('meta', name=> 'section', content => $path);
  -        $head->push_content( $meta );
  +
  +        if ( my $sections = map_path_to_sections( $path ) ) {
  +            my $meta = HTML::Element->new('meta', name=> 'section', content => $sections);
  +            $head->push_content( $meta );
  +        }
       }
   
       # Add the total document length, which is different than the section length
  @@ -171,8 +178,55 @@
   
       $params->{found}++;  # set flag;
   
  +
       $doc->delete;
   }
  +
  +my %section_names;
  +
  +sub map_path_to_sections {
  +    my $path = shift;
  +
  +    %section_names = fetch_sections( $CHECKBOX_DATA )
  +        unless %section_names;
  +
  +
  +    my @sections;
  +    for ( keys %section_names ) {
  +        my $test = quotemeta( $_ );
  +        push @sections, $section_names{ $_ } if $path =~ /^$test/;
  +    }
  +
  +    return @sections ? join(' ', @sections ) : undef;
  +}
  +
  +        
  +
  +
  +
  +use Storable;
  +sub fetch_sections {
  +    my $file = shift;
  +
  +    my $items_array = retrieve( $file );
  +    die unless $items_array;
  +
  +    my %sections;
  +    recurse_sections( \%sections, $items_array );
  +    return %sections;
  +
  +}
  +
  +sub recurse_sections {
  +    my ( $sections, $items_array ) = @_;
  +
  +    for ( @$items_array ) {
  +        # grab the path and its associated section ID
  +        $sections->{ $_->{path} } = $_->{section};
  +        recurse_sections( $sections, $_->{subs} ) if $_->{subs};
  +    }
  +}
  +
   
   
   1;
  
  
  
  1.3       +66 -7     modperl-docs/src/search/make.pl
  
  Index: make.pl
  ===================================================================
  RCS file: /home/cvs/modperl-docs/src/search/make.pl,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -r1.2 -r1.3
  --- make.pl	29 May 2002 07:21:46 -0000	1.2
  +++ make.pl	30 Jun 2002 05:45:16 -0000	1.3
  @@ -2,10 +2,58 @@
   use strict;
   use Storable;
   
  -# This must match up with .swishcgi.conf setting
  +=head1 NAME  
  +
  +make.pl -- program to generate data needed for searching
  +
  +=head1 Description
  +
  +make.pl uses input contained within that defines "sections" of the site based
  +on path names.  These name can then be used when searching with swish to limit
  +searches to just these areas of the site.
  +
  +When indexing the site with swish-e each file is taged with meta data that indicates
  +which section or sections it belongs to.
  +
  +The input format is described in the source of this file.
  +
  +make.pl creates two ouptut files:
  +
  +=over 4
  +
  +=item search_options
  +
  +A template toolkit include file for defining an array of section names and a hash that
  +maps the section names to nice descriptions.  This data is used to
  +create the select box on the side bar during site generation (by running bin/build).
  +
  +=item checkboxes.storable
  +
  +A perl data structure used for use in the F<search.cgi> script to generate the nested
  +checkboxes for the advanced search feature.  This allows selecting more than one
  +area of the site at a time.
  +
  +This file is saved using the Storable perl module, and is read in by the
  +search script (F<swish.cgi>) configuration parameter file F<.swishcgi.conf> and
  +made available to Template-Toolkit when F<swish.cgi> is running.
  +
  +This file is also read when indexing with swish-e (see F<SwishSpiderConfig.pl>) and is used to
  +map path names into section names.
  +
  +=back
  +
  +Running this program is described in the F<README> file contained in
  +the F<src/search> directory of the mod_perl site distribution.
  +
  +
  +=cut
  +
  +
  +# This must match up with .swishcgi.conf setting and SwishSpiderConfig.pl
   my $CHECKBOX_DATA = 'checkboxes.storable';
   
   # This is used for all pages -- it's the array and has for the sidebar search
  +# It contains an array parsable by Template Toolkit.
   my $SEARCH_OPTIONS = 'search_options';
   
   
  @@ -21,7 +69,7 @@
       0, download,           Download,                   Download
       0, docs,               Documentation,              All Docs
       1,   docs/1.0,         mod_perl 1.0 Docs,          1.0 Docs
  -    2,     docs/1.0/guide, Guide,
  +    2,     docs/1.0/guide, Guide
       2,     docs/1.0/win32, Win32
       2,     docs/1.0/api,   API
       1,   docs/2.0,         mod_perl 2.0 Docs,          2.0 Docs
  @@ -39,11 +87,16 @@
   
   
   
  +    # Split the above items out into a hash.
  +
  +    my $section_id = 'SecA';
  +
       my @items_flat = map {
                s/^\s+//;
                s/\s+$//;
  +             $_ = $section_id++ . ", $_";
                my %h;
  -             @h{qw/indent value label short/} = split m!\s*,\s*!;
  +             @h{qw/section indent path label short/} = split m!\s*,\s*!;
   
                $h{short} ||= ( $h{label} || 'missing description' );
   
  @@ -51,16 +104,22 @@
           } split /\n/, $items;
   
   
  -    my $array_values = join "\n", map { ' ' x (( $_->{indent}+2 ) * 4) . qq["$_->{value}"] }  @items_flat;
  +
  +    # Build the data parsable by Template-Toolkit
  +    
  +    my $array_values = join "\n", map { ' ' x (( $_->{indent}+2 ) * 4) . qq["$_->{section}"] }  @items_flat;
  +
       my $hash_values  = join "\n", map {
           my $dots = '..' x  $_->{indent};
           my $spaces = ' ' x (( $_->{indent}+2 ) * 4);
  -        qq[$spaces"$_->{value}" => "$dots$_->{short}" ]
  +        qq[$spaces"$_->{section}" => "$dots$_->{short}" ]
       } @items_flat;
           
  +
  +
       my $check_box_array = build_array( \@items_flat );
   
  -#use Data::Dumper;                
  +#use Data::Dumper;
   #print Dumper $check_box_array;
   
       store( $check_box_array, $CHECKBOX_DATA );  # store for swish.cgi
  @@ -99,7 +158,7 @@
   
   #==============================================================================
   # Subroutine that builds the data structure expected by template toolkit
  -# TT uses values .value, .label, and .subs.  See search.tt for example
  +# TT uses values .section, .label, and .subs.  See search.tt for example
   #
   #
   #
  
  
  
  1.16      +1 -1      modperl-docs/src/search/search.tt
  
  Index: search.tt
  ===================================================================
  RCS file: /home/cvs/modperl-docs/src/search/search.tt,v
  retrieving revision 1.15
  retrieving revision 1.16
  diff -u -r1.15 -r1.16
  --- search.tt	29 Apr 2002 22:30:09 -0000	1.15
  +++ search.tt	30 Jun 2002 05:45:16 -0000	1.16
  @@ -71,7 +71,7 @@
       <ul>
       [%- FOREACH sec = subs -%]
   
  -    <li class="search-list">[% CGI.checkbox('sbm', 0, sec.value, sec.label); %]
  +    <li class="search-list">[% CGI.checkbox('sbm', 0, sec.section, sec.label); %]
           [%- IF sec.subs -%][%- PROCESS sub_items subs=sec.subs -%][%- END -%]</li>
   
       [%- END -%]
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: docs-cvs-unsubscribe@perl.apache.org
For additional commands, e-mail: docs-cvs-help@perl.apache.org