You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@labs.apache.org by co...@apache.org on 2010/02/07 01:54:45 UTC
svn commit: r907354 [1/2] - in /labs/pulse: collect/ collection/
collection/aplists.pl test/ test/2010-02-05-test.xml
Author: coar
Date: Sun Feb 7 00:54:45 2010
New Revision: 907354
URL: http://svn.apache.org/viewvc?rev=907354&view=rev
Log:
Start breaking down into more finely grained pieces, and
change the <info> elements to use CDATA.
Added:
labs/pulse/collection/
- copied from r501073, labs/pulse/collect/
labs/pulse/collection/aplists.pl (with props)
labs/pulse/test/
labs/pulse/test/2010-02-05-test.xml
Removed:
labs/pulse/collect/
Added: labs/pulse/collection/aplists.pl
URL: http://svn.apache.org/viewvc/labs/pulse/collection/aplists.pl?rev=907354&view=auto
==============================================================================
--- labs/pulse/collection/aplists.pl (added)
+++ labs/pulse/collection/aplists.pl Sun Feb 7 00:54:45 2010
@@ -0,0 +1,317 @@
+#! /usr/bin/perl -w
+#
+# aplists.pl [options] [list-parent-directory]
+#
+# -h help/usage
+# -c count messages in last 24 hours
+# -o include public lists (subscription not moderated)
+# -O don't include public lists
+# -p include private lists (subscription moderated)
+# -P don't include private lists
+# -a include both public and private lists
+# -x Generate output in XML format
+#
+# Output format is one line per list:
+# <site>:<listname>[=<info-file>][,<listname>[=info-file]...
+#
+# or XML:
+# <mailing-lists>
+# <asof>yyyy-mm-dd</asof>
+# <site>
+# <name>sitename</name>
+# <list>
+# <name>listname</name>
+# <status>unknown|public|private</status>
+# <subscribers>int</subscribers>
+# <digest-subscribers>int</digest-subscribers>
+# <info>encoded-info</info>
+# <messages>int</messages> <!-- optional -->
+# </list>
+# <list>...
+# </site>
+# :
+# </mailing-lists>
+#
+use strict;
+use Getopt::Long;
+use Symbol;
+use POSIX;
+#use XML::LibXML::Common qw(:encoding);
+
+my %options;
+my $include_public = 1;
+my $include_private = 0;
+my $use_xml = 0;
+my $count = 0;
+my $debug = 0;
+my $phandle = gensym();
+
+Getopt::Long::Configure('bundling');
+GetOptions(\%options,
+ 'a' => sub { $include_public = $include_private = 1; },
+ 'c' => \$count,
+ 'd+' => \$debug,
+ 'h' => \&usage,
+ 'o' => \$include_public,
+ 'O' => sub { $include_public = 0; },
+ 'p' => \$include_private,
+ 'P' => sub { $include_private = 0; },
+ 'x' => \$use_xml,
+ );
+
+#
+# No counting if we're not generating XML..
+#
+$count = 0 if ($count && (! $use_xml));
+
+debug('public =', $include_public);
+debug('private =', $include_private);
+
+my $tlh = gensym();
+my $slh = gensym();
+
+my $TLD = $ARGV[0] || '/home/coar/apache-apmail/lists';
+opendir($tlh, $TLD) or die("Can't opendir($TLD): $!");
+my @tldirs = readdir($tlh);
+closedir($tlh);
+
+#
+# @tldirs now has a list of all the sites. Scan each one for the
+# lists on that site.
+#
+my %lists;
+for my $tldir (@tldirs) {
+ next if ($tldir !~ /\w\.\w+$/i);
+ next if (! -d "$TLD/$tldir");
+ opendir($slh, "$TLD/$tldir") or die("Can't opendir($TLD/$tldir): $!");
+ my @sldirs = readdir($slh);
+ closedir($slh);
+ for my $sldir (@sldirs) {
+ next if ($sldir =~ /^(?:\.+|cvs)$/i);
+ next if (! -d "$TLD/$tldir/$sldir");
+ push(@{$lists{$tldir}}, $sldir);
+ }
+}
+if ($use_xml) {
+ print "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n"
+ . "<mailing-lists>\n";
+ print ' <asof>' . strftime('%Y-%m-%d', localtime()) . "</asof>\n";
+}
+my $ifile = gensym();
+for my $site (sort(keys(%lists))) {
+ if ($use_xml) {
+ print " <site>\n"
+ . " <name>$site</name>\n";
+ }
+ else {
+ print "$site:";
+ }
+ my $continue = 0;
+ for my $list (sort(@{$lists{$site}})) {
+ my $is_archived;
+ my $cmd;
+ #
+ # Assume the list is public in case we can't figure it out.
+ #
+ my $public = 1;
+ my $is_digested = 1;
+ my $config = "$TLD/$site/$list/config";
+ if (open(CONFIG, "< $config")) {
+ while (<CONFIG>) {
+ if (/^F:/) {
+ debug(2, "found config line for $list\@$site: $_");
+ $public = ($_ =~ /S/);
+ $is_archived = ($_ =~ /a/);
+ $is_digested = ($_ =~ /d/);
+ last;
+ }
+ }
+ close(CONFIG);
+ }
+ debug(3, "$list\@$site is " . ($public ? '' : 'not ') . 'public');
+ debug(3, "$list\@$site is " . ($is_archived ? '' : 'not ') . 'archived');
+ next if (($public && (! $include_public))
+ || ((! $public) && (! $include_private)));
+ if ($continue) {
+ if (! $use_xml) {
+ print ',';
+ }
+ }
+ if ($use_xml) {
+ print " <list>\n"
+ . " <name>$list</name>\n"
+ . ' <status>'
+ . ($public ? 'public' : 'private')
+ . "</status>\n";
+ }
+ else {
+ print $list;
+ }
+ my $info = "$TLD/$site/$list/text/info";
+ if (-r $info) {
+ if ($use_xml) {
+ open($ifile, "< $info");
+ my @slurp = <$ifile>;
+ close($ifile);
+ my $slurp = join('', @slurp);
+ if ($slurp !~ /^No information has been provided /) {
+ $slurp = utf8_safe($slurp);
+# $slurp = encodeToUTF8('ISO-8859-1', $slurp);
+ chomp($slurp);
+ print " <info>\n"
+ . "<![CDATA[$slurp]]>\n"
+ . " </info>\n";
+ }
+ }
+ else {
+ print "=$info";
+ }
+ }
+ $cmd = "ezmlm-list -n $TLD/$site/$list |";
+ open($phandle, $cmd)
+ and do {
+ my $subs = <$phandle>;
+ close($phandle);
+ $subs =~ /(\d+)/;
+ $subs = $1;
+ print " <subscribers>$subs</subscribers>\n";
+ };
+ if ($is_digested) {
+ $cmd = "ezmlm-list -n $TLD/$site/$list/digest |";
+ open($phandle, $cmd)
+ and do {
+ my $subs = <$phandle>;
+ close($phandle);
+ $subs =~ /(\d+)/;
+ $subs = $1;
+ print ' <digest-subscribers>'
+ . $subs
+ . "</digest-subscribers>\n";
+ };
+ }
+ if ($count) {
+ my $msgs = -1;
+ if ($is_archived) {
+ $cmd = "find $TLD/$site/$list/archive "
+ . "-mtime -1 -type f -name '[0-9]*' "
+ . '| wc -l |';
+ debug(2, $cmd);
+ open($phandle, $cmd)
+ and do {
+ $msgs = <$phandle>;
+ close($phandle);
+ chomp($msgs);
+ debug(3, "msgs = '$msgs'");
+ $msgs =~ /(\d+)/;
+ $msgs = $1;
+ };
+ }
+ print " <messages>$msgs</messages>\n";
+ }
+ my $ph = gensym();
+ my $mods = '';
+ $cmd = "ezmlm-list $TLD/$site/$list/mod |";
+ debug(2, $cmd);
+ open($ph, $cmd)
+ and do {
+ my @mods = <$ph>;
+ close($ph);
+ chomp(@mods);
+ $mods = join(' ', @mods);
+ debug(3, "mods = '$mods'");
+ };
+ if ($mods) {
+ $mods = utf8_safe($mods);
+# $mods = encodeToUTF8('ISO-8859-1', $mods);
+ print " <moderators><![CDATA[$mods]]></moderators>\n";
+ }
+ if ($use_xml) {
+ print " </list>\n";
+ }
+ }
+ if ($use_xml) {
+ print " </site>\n";
+ }
+ else {
+ print "\n";
+ }
+}
+if ($use_xml) {
+ print "</mailing-lists>\n";
+}
+#
+# Help display
+#
+sub usage {
+ print STDERR "Usage: $0 [-ahpP] [list-parent]\n"
+ . " -a Include both public and private lists\n"
+ . " -h This message\n"
+ . " -o Include open (public) lists (default)\n"
+ . " -O Do not include open lists\n"
+ . " -p Include closed (private) lists\n"
+ . " -P Do not include closed lists (default)\n";
+ exit(0);
+}
+
+sub debug {
+ my $level = 1;
+ if ($_[0] =~ /^\d+$/) {
+ $level = shift;
+ }
+ if ($debug >= $level) {
+ print 'debug: ' . join(' ', @_) . "\n";
+ }
+}
+
+sub utf8_safe {
+ my ($input) = @_;
+ my %table;
+ my $ichar;
+ for (my $i = 0; $i <= 0xFF; $i++) {
+ $ichar = chr($i);
+ if (($i == 0x09)
+ || ($i == 0x0A)
+ || ($i == 0x0D)
+ || (($i >= 0x20)
+ && ($i <= 0x7F))) {
+ $ichar = chr($i);
+# if ($ichar eq '<') {
+# $table{$ichar} = '<';
+# }
+# elsif ($ichar eq '>') {
+# $table{$ichar} = '>';
+# }
+# elsif ($ichar eq '&') {
+# $table{$ichar} = '&';
+# }
+ next;
+ }
+ if (($i < 0x7F)
+ || (($i >= 0x7F) && ($i <= 0x84))
+ || (($i >= 0x86) && ($i <= 0x9F))) {
+ $table{$ichar} = '*';
+ }
+ else {
+ $table{$ichar} = sprintf('&#x%02x;', $i);
+ }
+ }
+ my $output = '';
+ for (my $i = 0; $i < length($input); $i++) {
+ my $ichar = substr($input, $i, 1);
+ if (defined($table{$ichar})) {
+ $output .= $table{$ichar};
+ }
+ else {
+ $output .= $ichar;
+ }
+ }
+ return $output;
+}
+
+#
+# Local Variables:
+# mode: cperl
+# tab-width: 4
+# indent-tabs-mode: nil
+# End:
+#
Propchange: labs/pulse/collection/aplists.pl
------------------------------------------------------------------------------
svn:executable = *
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@labs.apache.org
For additional commands, e-mail: commits-help@labs.apache.org