You are viewing a plain text version of this content. The canonical link for it is here.
Posted to docs-cvs@perl.apache.org by st...@apache.org on 2002/06/15 20:20:33 UTC
cvs commit: modperl-docs/lib/DocSet/Source HTML.pm
stas 2002/06/15 11:20:33
Modified: lib/DocSet RunTime.pm
lib/DocSet/Doc HTML2HTML.pm POD2HTML.pm
lib/DocSet/Source HTML.pm
Log:
sync with DocSet
- improve the parsing of the E<lt>headE<gt> and make base, meta and
link elements available to the templates. [Per Einar Ellefsen
<pe...@skynet.be>]
- correct the mapping of config.cfg to autogenerated index.html, also
use path2uri to convert from a path to uri. also fix the stripping
of the full base path on the non unix system, by using abs2rel from
File::Spec.
Revision Changes Path
1.8 +13 -23 modperl-docs/lib/DocSet/RunTime.pm
Index: RunTime.pm
===================================================================
RCS file: /home/cvs/modperl-docs/lib/DocSet/RunTime.pm,v
retrieving revision 1.7
retrieving revision 1.8
diff -u -r1.7 -r1.8
--- RunTime.pm 11 Jun 2002 15:02:10 -0000 1.7
+++ RunTime.pm 15 Jun 2002 18:20:33 -0000 1.8
@@ -6,7 +6,7 @@
use strict;
use warnings;
-use File::Spec::Functions qw(catdir catfile splitdir);
+use File::Spec::Functions qw(catdir catfile splitdir abs2rel);
use File::Find;
use DocSet::Util;
@@ -84,7 +84,8 @@
@search_paths = @{$ra_search_paths || []};
- %exts = map {$_ => 1} @{$ra_search_exts || []};
+ # .cfg is for matching config.cfg to become index.html
+ %exts = map {$_ => 1} @{$ra_search_exts || []}, 'cfg';
my @ext_accept_pattern = map {quotemeta($_)."\$"} keys %exts;
my $rsub_keep_ext =
@@ -92,34 +93,23 @@
my %seen;
for my $rel_path (@search_paths) {
- my $full_path = catdir $base, $rel_path;
- die "$full_path is not a dir" unless -d $full_path;
+ my $full_base_path = catdir $base, $rel_path;
+ die "$full_base_path is not a dir" unless -d $full_base_path;
my @seen_pattern = map {"^".quotemeta($_)} keys %seen;
- my $rsub_skip_seen =
- build_matchmany_sub(\@seen_pattern);
+ my $rsub_skip_seen = build_matchmany_sub(\@seen_pattern);
- # rewrite non / paths to be / as in URI ($rel_path is no more
- # needed to read from the real fs, will need this fixup for
- # generating proper URIs.
- $rel_path = join "/", splitdir $rel_path;
-
- my $full_path_regex = quotemeta $full_path;
- $src_docs{$rel_path} = {
- "index.html" => 1, # base index.html
- map { m{(.*?/?)[^/]+$} # add autogenerated index.html
- ? ("$1index.html" => 1, $_ => 1)
- : ($_ => 1); # shouldn't happen, but just in case
- }
- map {join "/", splitdir $_} # rewrite non / paths to be URI's /
- map {s|$full_path_regex/||; $_} # strip the leading path
+ my $rel_uri = path2uri($rel_path);
+ $src_docs{$rel_uri} = {
+ map { s/config\.cfg$/index.html/; ($_ => 1) } # autogenerated index.html
+ map path2uri( abs2rel($_, $full_base_path) ), # full path=>relative uri
grep $rsub_keep_ext->($_), # get files with wanted exts
grep !$rsub_skip_seen->($_), # skip seen base dirs
- @{ expand_dir($full_path) }
+ @{ expand_dir($full_base_path) }
};
- note "Scanning for src files: $full_path";
- $seen{$full_path}++;
+ note "Scanning for src files: $full_base_path";
+ $seen{$full_base_path}++;
}
# dumper \%src_docs;
1.3 +1 -0 modperl-docs/lib/DocSet/Doc/HTML2HTML.pm
Index: HTML2HTML.pm
===================================================================
RCS file: /home/cvs/modperl-docs/lib/DocSet/Doc/HTML2HTML.pm,v
retrieving revision 1.2
retrieving revision 1.3
diff -u -r1.2 -r1.3
--- HTML2HTML.pm 5 Feb 2002 10:27:19 -0000 1.2
+++ HTML2HTML.pm 15 Jun 2002 18:20:33 -0000 1.3
@@ -20,6 +20,7 @@
my $vars = {
meta => $self->{meta},
body => \@body,
+ headers => $self->{parsed_tree}{head},
dir => $self->{dir},
nav => $self->{nav},
last_modified => $self->{timestamp},
1.7 +1 -0 modperl-docs/lib/DocSet/Doc/POD2HTML.pm
Index: POD2HTML.pm
===================================================================
RCS file: /home/cvs/modperl-docs/lib/DocSet/Doc/POD2HTML.pm,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- POD2HTML.pm 13 Jun 2002 09:20:16 -0000 1.6
+++ POD2HTML.pm 15 Jun 2002 18:20:33 -0000 1.7
@@ -48,6 +48,7 @@
meta => $self->{meta},
toc => $self->{toc},
body => \@body,
+ headers => {},
dir => $self->{dir},
nav => $self->{nav},
last_modified => $self->{timestamp},
1.7 +62 -20 modperl-docs/lib/DocSet/Source/HTML.pm
Index: HTML.pm
===================================================================
RCS file: /home/cvs/modperl-docs/lib/DocSet/Source/HTML.pm,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- HTML.pm 14 Apr 2002 06:17:48 -0000 1.6
+++ HTML.pm 15 Jun 2002 18:20:33 -0000 1.7
@@ -53,10 +53,6 @@
}
}
-# currently retrieves these parts from the source HTML
-# head.title
-# head.meta.description
-# body
sub parse {
my($self) = @_;
@@ -110,11 +106,12 @@
accum_h($self, $text);
}
- my $p = HTML::Parser->new(api_version => 3,
- start_h => [\&start_h, "self, tagname, attr, text"],
- end_h => [\&end_h, "self, tagname"],
- text_h => [\&text_h, "self, text"],
- );
+ my $p = HTML::Parser->new(
+ api_version => 3,
+ start_h => [\&start_h, "self, tagname, attr, text"],
+ end_h => [\&end_h, "self, tagname"],
+ text_h => [\&text_h, "self, text"],
+ );
# Parse document text chunk by chunk
$p->parse(${ $self->{content} });
$p->eof;
@@ -124,26 +121,70 @@
}
{
- # this one retrieves and stashes away the description (As 'abstract')
- # and the body and the title of the given html
+ # this parsing extracts the following elements and makes them
+ # available to templates as:
+ # meta.title
+ # head.meta.* (+ renames: description -> abstract)
+ # head.base
+ # head.link
+ # body
+
+ # init
my $start_h = sub {
- my($self, $tagname, $attr) = @_;
- if ($tagname eq 'meta' && lc $attr->{name} eq 'description') {
- $self->{parsed_tree}->{abstract} = $attr->{content};
+ my($self, $tagname, $attr, $text) = @_;
+ my $meta = $self->{parsed_tree}{head}{meta};
+
+ # special treatment
+ if ($tagname eq 'meta' && exists $attr->{name} &&
+ lc $attr->{name} eq 'description') {
+ $self->{parsed_tree}{abstract} = $attr->{content};
+ }
+ elsif ($tagname eq 'meta' && exists $attr->{content}) {
+ # note: doesn't take into account the 'scheme' attr,
+ # but that one isn't used much
+ if (exists $attr->{name}) {
+ $meta->{name}{ $attr->{name} } = $attr->{content};
+ }
+ elsif (exists $attr->{'http-equiv'}) {
+ $meta->{'http-equiv'}{ $attr->{'http-equiv'} }
+ = $attr->{content};
+ }
+ else {
+ # unsupported head element?
+ }
+ }
+ elsif ($tagname eq 'base') {
+ # there is usually only one <base>
+ $self->{parsed_tree}{head}{base} = $attr->{href}
+ if exists $attr->{href};
}
+ elsif ($tagname eq 'link') {
+ # link elements won't overlap, because each is
+ # additive -> easier to store text
+ $self->{parsed_tree}{head}{link} .= $text if length $text;
+ }
+ # note: if adding other elements that also appear outside <head>,
+ # you will need to check that you are inside <head> by setting
+ # a flag when entering it and unsetting it when exiting
};
my $end_h = sub {
my($self, $tagname, $skipped_text) = @_;
# use $p itself as a tmp storage (ok according to the docs)
+ # <title> and <body> get special treatment
+ if ($tagname eq 'title' or $tagname eq 'body') {
$self->{parsed_tree}->{$tagname} = $skipped_text;
+ }
};
- my $p = HTML::Parser->new(api_version => 3,
- report_tags => [qw(title meta body)],
- start_h => [$start_h, "self, tagname, attr"],
- end_h => [$end_h, "self, tagname, skipped_text"],
- );
+ my $p = HTML::Parser->new(
+ api_version => 3,
+ report_tags => [qw(title meta body base link)],
+ start_h => [$start_h, "self, tagname, attr, text"],
+ end_h => [$end_h, "self, tagname, skipped_text"],
+ );
+ # init
+ $p->{parsed_tree}{head}{meta} = {};
# Parse document text chunk by chunk
$p->parse(${ $self->{content} });
$p->eof;
@@ -180,7 +221,8 @@
Retrieve and set the meta data that describes the input document into
the I<meta> object attribute. The I<title> and I<link> meta attributes
-are getting set.
+are getting set. the rest of the E<lt>headE<gt> is made available for
+the templates too.
=back
---------------------------------------------------------------------
To unsubscribe, e-mail: docs-cvs-unsubscribe@perl.apache.org
For additional commands, e-mail: docs-cvs-help@perl.apache.org