You are viewing a plain text version of this content. The canonical link for it is here.
Posted to docs-cvs@perl.apache.org by mo...@apache.org on 2002/06/30 07:45:16 UTC
cvs commit: modperl-docs/src/search SwishSpiderConfig.pl make.pl search.tt
moseley 2002/06/29 22:45:16
Modified: src/search SwishSpiderConfig.pl make.pl search.tt
Log:
Updated the indexing to assign unique sections IDs to docs instead of
just trying to limit by hits on words in the path.
Revision Changes Path
1.10 +80 -26 modperl-docs/src/search/SwishSpiderConfig.pl
Index: SwishSpiderConfig.pl
===================================================================
RCS file: /home/cvs/modperl-docs/src/search/SwishSpiderConfig.pl,v
retrieving revision 1.9
retrieving revision 1.10
diff -u -r1.9 -r1.10
--- SwishSpiderConfig.pl 19 Apr 2002 19:53:33 -0000 1.9
+++ SwishSpiderConfig.pl 30 Jun 2002 05:45:16 -0000 1.10
@@ -1,11 +1,15 @@
# this is the modified default spider config file that comes with swish-e.
+# Perldoc swish.cgi for docs on the format of this file
#
-# a few custom callbacks are located after the @servers definition section.
+# a few custom callbacks are located after the @servers definition section
+# these are used to split files into sections.
my $base_path = $ENV{MODPERL_SITE} || die "must set \$ENV{MODPERL_SITE}";
$base_path =~ s[/$][];
+# Used to fetch the available "sections"
+my $CHECKBOX_DATA = 'checkboxes.storable';
@servers = (
@@ -63,7 +67,6 @@
# Find the <head> section for use in all split pages
my $head = $tree->look_down( '_tag', 'head' );
-
# Now create a new "document" for each
create_page( $head->clone, $_->clone, \%params )
for $tree->look_down( '_tag', 'div', 'class', 'index-section' );
@@ -73,7 +76,8 @@
## so don't index it.
$tree->delete;
return 0;
-
+
+ # old code below to index pages that don't have sections defined.
# Indexed the page in sections, just return
@@ -102,34 +106,35 @@
my $uri = $params->{uri};
+ # Grab the first <a name="..."> tag that indicates this section.
+ # and adjust the path
- # Grab the section link, and create a new title
+ if ( my $name = $section->look_down( '_tag', 'a', sub { defined($_[0]->attr('name')) } ) ) {
+ $uri->fragment( $name->attr('name') );
+ }
+
- my $name = $section->look_down( '_tag', 'a', sub { defined($_[0]->attr('name')) } );
-
- if ( $name ) {
+ # Now grab the first <a href="..">description</a> tag
+ if ( my $link = $section->look_down( '_tag', 'a', sub { defined($_[0]->attr('href')) } ) ) {
- my @a_content;
-
- my $section_name = $name->attr('name');
- $uri->fragment( $section_name );
+ my $description = $link->as_text;
- if ( ! (@a_content = $name->content_list) ) {
- $section_name =~ tr/_/ /;
- @a_content = ( $section_name );
- }
+ if ( $description ) {
- # Modify or create the title
+ # Modify or create the title
- my $title = $head->look_down('_tag', 'title');
+ my $title = $head->look_down('_tag', 'title');
+
+ if ( $title ) {
+ $title->push_content( ": $description" );
- if ( $title ) {
- $title->push_content( ': ', @a_content );
- } else {
- my $title = HTML::Element->new('title');
- $title->push_content( @a_content );
- $head->push_content( $title );
+ } else { # Create a new title
+
+ my $title = HTML::Element->new('title');
+ $title->push_content( $description );
+ $head->push_content( $title );
+ }
}
}
@@ -142,9 +147,11 @@
if ( $uri =~ m!$base_path/(.+)$! ) {
my $path = $1;
- $path =~ s{/?[^/]+$}{}; # remove file name, if one
- my $meta = HTML::Element->new('meta', name=> 'section', content => $path);
- $head->push_content( $meta );
+
+ if ( my $sections = map_path_to_sections( $path ) ) {
+ my $meta = HTML::Element->new('meta', name=> 'section', content => $sections);
+ $head->push_content( $meta );
+ }
}
# Add the total document length, which is different than the section length
@@ -171,8 +178,55 @@
$params->{found}++; # set flag;
+
$doc->delete;
}
+
+my %section_names;
+
+sub map_path_to_sections {
+ my $path = shift;
+
+ %section_names = fetch_sections( $CHECKBOX_DATA )
+ unless %section_names;
+
+
+ my @sections;
+ for ( keys %section_names ) {
+ my $test = quotemeta( $_ );
+ push @sections, $section_names{ $_ } if $path =~ /^$test/;
+ }
+
+ return @sections ? join(' ', @sections ) : undef;
+}
+
+
+
+
+
+use Storable;
+sub fetch_sections {
+ my $file = shift;
+
+ my $items_array = retrieve( $file );
+ die unless $items_array;
+
+ my %sections;
+ recurse_sections( \%sections, $items_array );
+ return %sections;
+
+}
+
+sub recurse_sections {
+ my ( $sections, $items_array ) = @_;
+
+ for ( @$items_array ) {
+ # grab the path and its associated section ID
+ $sections->{ $_->{path} } = $_->{section};
+ recurse_sections( $sections, $_->{subs} ) if $_->{subs};
+ }
+}
+
1;
1.3 +66 -7 modperl-docs/src/search/make.pl
Index: make.pl
===================================================================
RCS file: /home/cvs/modperl-docs/src/search/make.pl,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- make.pl 29 May 2002 07:21:46 -0000 1.2
+++ make.pl 30 Jun 2002 05:45:16 -0000 1.3
@@ -2,10 +2,58 @@
use strict;
use Storable;
-# This must match up with .swishcgi.conf setting
+=head1 NAME
+
+make.pl -- program to generate data needed for searching
+
+=head1 Description
+
+make.pl uses input contained within that defines "sections" of the site based
+on path names. These name can then be used when searching with swish to limit
+searches to just these areas of the site.
+
+When indexing the site with swish-e each file is taged with meta data that indicates
+which section or sections it belongs to.
+
+The input format is described in the source of this file.
+
+make.pl creates two ouptut files:
+
+=over 4
+
+=item search_options
+
+A template toolkit include file for defining an array of section names and a hash that
+maps the section names to nice descriptions. This data is used to
+create the select box on the side bar during site generation (by running bin/build).
+
+=item checkboxes.storable
+
+A perl data structure used for use in the F<search.cgi> script to generate the nested
+checkboxes for the advanced search feature. This allows selecting more than one
+area of the site at a time.
+
+This file is saved using the Storable perl module, and is read in by the
+search script (F<swish.cgi>) configuration parameter file F<.swishcgi.conf> and
+made available to Template-Toolkit when F<swish.cgi> is running.
+
+This file is also read when indexing with swish-e (see F<SwishSpiderConfig.pl>) and is used to
+map path names into section names.
+
+=back
+
+Running this program is described in the F<README> file contained in
+the F<src/search> directory of the mod_perl site distribution.
+
+
+=cut
+
+
+# This must match up with .swishcgi.conf setting and SwishSpiderConfig.pl
my $CHECKBOX_DATA = 'checkboxes.storable';
# This is used for all pages -- it's the array and has for the sidebar search
+# It contains an array parsable by Template Toolkit.
my $SEARCH_OPTIONS = 'search_options';
@@ -21,7 +69,7 @@
0, download, Download, Download
0, docs, Documentation, All Docs
1, docs/1.0, mod_perl 1.0 Docs, 1.0 Docs
- 2, docs/1.0/guide, Guide,
+ 2, docs/1.0/guide, Guide
2, docs/1.0/win32, Win32
2, docs/1.0/api, API
1, docs/2.0, mod_perl 2.0 Docs, 2.0 Docs
@@ -39,11 +87,16 @@
+ # Split the above items out into a hash.
+
+ my $section_id = 'SecA';
+
my @items_flat = map {
s/^\s+//;
s/\s+$//;
+ $_ = $section_id++ . ", $_";
my %h;
- @h{qw/indent value label short/} = split m!\s*,\s*!;
+ @h{qw/section indent path label short/} = split m!\s*,\s*!;
$h{short} ||= ( $h{label} || 'missing description' );
@@ -51,16 +104,22 @@
} split /\n/, $items;
- my $array_values = join "\n", map { ' ' x (( $_->{indent}+2 ) * 4) . qq["$_->{value}"] } @items_flat;
+
+ # Build the data parsable by Template-Toolkit
+
+ my $array_values = join "\n", map { ' ' x (( $_->{indent}+2 ) * 4) . qq["$_->{section}"] } @items_flat;
+
my $hash_values = join "\n", map {
my $dots = '..' x $_->{indent};
my $spaces = ' ' x (( $_->{indent}+2 ) * 4);
- qq[$spaces"$_->{value}" => "$dots$_->{short}" ]
+ qq[$spaces"$_->{section}" => "$dots$_->{short}" ]
} @items_flat;
+
+
my $check_box_array = build_array( \@items_flat );
-#use Data::Dumper;
+#use Data::Dumper;
#print Dumper $check_box_array;
store( $check_box_array, $CHECKBOX_DATA ); # store for swish.cgi
@@ -99,7 +158,7 @@
#==============================================================================
# Subroutine that builds the data structure expected by template toolkit
-# TT uses values .value, .label, and .subs. See search.tt for example
+# TT uses values .section, .label, and .subs. See search.tt for example
#
#
#
1.16 +1 -1 modperl-docs/src/search/search.tt
Index: search.tt
===================================================================
RCS file: /home/cvs/modperl-docs/src/search/search.tt,v
retrieving revision 1.15
retrieving revision 1.16
diff -u -r1.15 -r1.16
--- search.tt 29 Apr 2002 22:30:09 -0000 1.15
+++ search.tt 30 Jun 2002 05:45:16 -0000 1.16
@@ -71,7 +71,7 @@
<ul>
[%- FOREACH sec = subs -%]
- <li class="search-list">[% CGI.checkbox('sbm', 0, sec.value, sec.label); %]
+ <li class="search-list">[% CGI.checkbox('sbm', 0, sec.section, sec.label); %]
[%- IF sec.subs -%][%- PROCESS sub_items subs=sec.subs -%][%- END -%]</li>
[%- END -%]
---------------------------------------------------------------------
To unsubscribe, e-mail: docs-cvs-unsubscribe@perl.apache.org
For additional commands, e-mail: docs-cvs-help@perl.apache.org