You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2019/08/13 05:01:09 UTC
svn commit: r1865007 - in /spamassassin/trunk: UPGRADE
lib/Mail/SpamAssassin/Logger.pm lib/Mail/SpamAssassin/Logger/File.pm
lib/Mail/SpamAssassin/Logger/Stderr.pm lib/Mail/SpamAssassin/Logger/Syslog.pm
lib/Mail/SpamAssassin/PerMsgStatus.pm
Author: hege
Date: Tue Aug 13 05:01:09 2019
New Revision: 1865007
URL: http://svn.apache.org/viewvc?rev=1865007&view=rev
Log:
More Bug 6583 tweaks
Modified:
spamassassin/trunk/UPGRADE
spamassassin/trunk/lib/Mail/SpamAssassin/Logger.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Logger/File.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Stderr.pm
spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Syslog.pm
spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
Modified: spamassassin/trunk/UPGRADE
URL: http://svn.apache.org/viewvc/spamassassin/trunk/UPGRADE?rev=1865007&r1=1865006&r2=1865007&view=diff
==============================================================================
--- spamassassin/trunk/UPGRADE (original)
+++ spamassassin/trunk/UPGRADE Tue Aug 13 05:01:09 2019
@@ -2,11 +2,12 @@
Note for Users Upgrading to SpamAssassin 4.0.0
----------------------------------------------
-- All log output (stderr, file, syslog) is now binary safe,
- non-ascii characters are escaped as \x{XX} or \x{XXXX}, backslashes escaped
- as \\. Whitespace is not normalized anymore like in pre-4.0 versions.
+- All log output (stderr, file, syslog) is now escaped properly,
+ \r \n \t \\, and control chars, DEL, UTF-8 sequences as \x{XX}.
+ Whitespace is not normalized anymore like in versions <4.0.
- API: Logger::add() has new optional 'escape' parameter.
+ New Logger::escape_str() function.
- API: New $pms->add_uri_detail_list() function. Also new uri_detail_list
types: unlinked, schemeless
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Logger.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Logger.pm?rev=1865007&r1=1865006&r2=1865007&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Logger.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Logger.pm Tue Aug 13 05:01:09 2019
@@ -77,6 +77,30 @@ use Mail::SpamAssassin::Logger::Stderr;
$LOG_SA{method}->{stderr} =
Mail::SpamAssassin::Logger::Stderr->new(escape => 1);
+# Use of M:SA:Util causes circular dependencies, separate helper here.
+my %escape_map =
+ ("\r" => '\\r', "\n" => '\\n', "\t" => '\\t', "\\" => '\\\\');
+sub escape_str {
+ # Things are already forced as octets by _log, no utf8::encode needed
+ # Control chars, DEL, backslash
+ if ($_[0] =~ tr/\x00-\x1F\x7F\\//) { # triage helps a lot
+ $_[0] =~ s@
+ ( [\x00-\x1F\x7F\\] )
+ @ $escape_map{$1} || sprintf("\\x{%02X}",ord($1))
+ @egsx;
+ }
+ # Also escape UTF-8 sequences for logs, so stuff outputting on
+ # terminals doesn't depend on charset
+ if ($_[0] =~ tr/\xC0-\xF7//) { # triage helps a lot
+ $_[0] =~ s@
+ ( [\xC0-\xDF][\x80-\xBF] | # Loose UTF-8
+ [\xE0-\xEF][\x80-\xBF]{2} | # ...
+ [\xF0-\xF7][\x80-\xBF]{3} ) # ...
+ @ join('', map {sprintf("\\x{%02X}",ord($_))} split(//, $1))
+ @egsx;
+ }
+}
+
=head1 METHODS
=over 4
@@ -275,6 +299,9 @@ sub _log {
}
my ($level, $message, @args) = @_;
+
+ utf8::encode($message) if utf8::is_utf8($message); # handle as octets
+
$message =~ s/^(?:[a-z0-9_-]*):\s*//i;
$message = sprintf($message,@args) if @args;
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Logger/File.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Logger/File.pm?rev=1865007&r1=1865006&r2=1865007&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Logger/File.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Logger/File.pm Tue Aug 13 05:01:09 2019
@@ -37,13 +37,17 @@ use re 'taint';
use POSIX ();
use Time::HiRes ();
use Mail::SpamAssassin::Logger;
-use Mail::SpamAssassin::Util qw(am_running_on_windows);
our @ISA = ();
# ADDING OS-DEPENDENT LINE TERMINATOR - BUG 6456
+
+# Using Mail::SpamAssassin::Util::am_running_on_windows() leads to circular
+# dependencies. So, we are duplicating the code instead.
+use constant RUNNING_ON_WINDOWS => ($^O =~ /^(?:mswin|dos|os2)/oi);
+
my $eol = "\n";
-if (am_running_on_windows()) {
+if (RUNNING_ON_WINDOWS) {
$eol = "\r\n";
}
@@ -103,12 +107,8 @@ sub log_message {
$timestamp .= ' ' if $timestamp ne '';
if ($self->{escape}) {
- local $1;
- # Bug 6583:
- # Quote non-ascii characters as \x{XX} or \x{XXXX} (Unicode)
- # Also quote backslash, so the log can be unescaped properly
- $msg =~ s{([^\x20-\x5b\x5d-\x7e])}{ $1 eq '\\' ? '\\\\' :
- sprintf(ord($1) > 255 ? '\\x{%04X}' : '\\x{%02X}', ord($1)) }egs;
+ # Bug 6583, escape
+ Mail::SpamAssassin::Logger::escape_str($msg);
} elsif (!exists $self->{escape}) {
# Backwards compatible pre-4.0 escaping, if $escape not given.
# replace control characters with "_", tabs and spaces get
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Stderr.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Stderr.pm?rev=1865007&r1=1865006&r2=1865007&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Stderr.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Stderr.pm Tue Aug 13 05:01:09 2019
@@ -36,6 +36,7 @@ use re 'taint';
use POSIX ();
use Time::HiRes ();
+use Mail::SpamAssassin::Logger;
our @ISA = ();
@@ -85,12 +86,8 @@ sub log_message {
$timestamp .= ' ' if $timestamp ne '';
if ($self->{escape}) {
- local $1;
- # Bug 6583:
- # Quote non-ascii characters as \x{XX} or \x{XXXX} (Unicode)
- # Also quote backslash, so the log can be unescaped properly
- $msg =~ s{([^\x20-\x5b\x5d-\x7e])}{ $1 eq '\\' ? '\\\\' :
- sprintf(ord($1) > 255 ? '\\x{%04X}' : '\\x{%02X}', ord($1)) }egs;
+ # Bug 6583, escape
+ Mail::SpamAssassin::Logger::escape_str($msg);
} elsif (!exists $self->{escape}) {
# Backwards compatible pre-4.0 escaping, if $escape not given.
# replace control characters with "_", tabs and spaces get
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Syslog.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Syslog.pm?rev=1865007&r1=1865006&r2=1865007&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Syslog.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Syslog.pm Tue Aug 13 05:01:09 2019
@@ -150,12 +150,8 @@ sub log_message {
}
if ($self->{escape}) {
- local $1;
- # Bug 6583:
- # Quote non-ascii characters as \x{XX} or \x{XXXX} (Unicode)
- # Also quote backslash, so the log can be unescaped properly
- $msg =~ s{([^\x20-\x5b\x5d-\x7e])}{ $1 eq '\\' ? '\\\\' :
- sprintf(ord($1) > 255 ? '\\x{%04X}' : '\\x{%02X}', ord($1)) }egs;
+ # Bug 6583, escape
+ Mail::SpamAssassin::Logger::escape_str($msg);
} elsif (!exists $self->{escape}) {
# Backwards compatible pre-4.0 escaping, if $escape not given
# replace control characters with "_", tabs and spaces get
Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1865007&r1=1865006&r2=1865007&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Tue Aug 13 05:01:09 2019
@@ -944,10 +944,6 @@ sub get_content_preview {
$str =~ s/[-_*.]{10,}//gs;
$str =~ s/\s+/ /gs;
- # escape non-ascii stuff like Logger does
- $str =~ s{([^\n\x20-\x5b\x5d-\x7e])}{ $1 eq '\\' ? '\\\\' :
- sprintf(ord($1) > 255 ? '\\x{%04X}' : '\\x{%02X}', ord($1)) }egs;
-
# add "Content preview:" ourselves, so that the text aligns
# correctly with the template -- then trim it off. We don't
# have to get this *exactly* right, but it's nicer if we