You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2019/08/13 05:01:09 UTC

svn commit: r1865007 - in /spamassassin/trunk: UPGRADE lib/Mail/SpamAssassin/Logger.pm lib/Mail/SpamAssassin/Logger/File.pm lib/Mail/SpamAssassin/Logger/Stderr.pm lib/Mail/SpamAssassin/Logger/Syslog.pm lib/Mail/SpamAssassin/PerMsgStatus.pm

Author: hege
Date: Tue Aug 13 05:01:09 2019
New Revision: 1865007

URL: http://svn.apache.org/viewvc?rev=1865007&view=rev
Log:
More Bug 6583 tweaks

Modified:
    spamassassin/trunk/UPGRADE
    spamassassin/trunk/lib/Mail/SpamAssassin/Logger.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Logger/File.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Stderr.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Syslog.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm

Modified: spamassassin/trunk/UPGRADE
URL: http://svn.apache.org/viewvc/spamassassin/trunk/UPGRADE?rev=1865007&r1=1865006&r2=1865007&view=diff
==============================================================================
--- spamassassin/trunk/UPGRADE (original)
+++ spamassassin/trunk/UPGRADE Tue Aug 13 05:01:09 2019
@@ -2,11 +2,12 @@
 Note for Users Upgrading to SpamAssassin 4.0.0
 ----------------------------------------------
 
-- All log output (stderr, file, syslog) is now binary safe,
-  non-ascii characters are escaped as \x{XX} or \x{XXXX}, backslashes escaped
-  as \\. Whitespace is not normalized anymore like in pre-4.0 versions.
+- All log output (stderr, file, syslog) is now escaped properly,
+  \r \n \t \\, and control chars, DEL, UTF-8 sequences as \x{XX}.
+  Whitespace is not normalized anymore like in versions <4.0.
 
 - API: Logger::add() has new optional 'escape' parameter.
+  New Logger::escape_str() function.
 
 - API: New $pms->add_uri_detail_list() function.  Also new uri_detail_list
   types: unlinked, schemeless

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Logger.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Logger.pm?rev=1865007&r1=1865006&r2=1865007&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Logger.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Logger.pm Tue Aug 13 05:01:09 2019
@@ -77,6 +77,30 @@ use Mail::SpamAssassin::Logger::Stderr;
 $LOG_SA{method}->{stderr} =
   Mail::SpamAssassin::Logger::Stderr->new(escape => 1);
 
+# Use of M:SA:Util causes circular dependencies, separate helper here.
+my %escape_map =
+  ("\r" => '\\r', "\n" => '\\n', "\t" => '\\t', "\\" => '\\\\');
+sub escape_str {
+  # Things are already forced as octets by _log, no utf8::encode needed
+  # Control chars, DEL, backslash
+  if ($_[0] =~ tr/\x00-\x1F\x7F\\//) { # triage helps a lot
+    $_[0] =~ s@
+      ( [\x00-\x1F\x7F\\] )
+      @ $escape_map{$1} || sprintf("\\x{%02X}",ord($1))
+      @egsx;
+  }
+  # Also escape UTF-8 sequences for logs, so stuff outputting on
+  # terminals doesn't depend on charset
+  if ($_[0] =~ tr/\xC0-\xF7//) { # triage helps a lot
+    $_[0] =~ s@
+      ( [\xC0-\xDF][\x80-\xBF] |    # Loose UTF-8
+        [\xE0-\xEF][\x80-\xBF]{2} | # ...
+        [\xF0-\xF7][\x80-\xBF]{3} ) # ...
+      @ join('', map {sprintf("\\x{%02X}",ord($_))} split(//, $1))
+      @egsx;
+  }
+}
+
 =head1 METHODS
 
 =over 4
@@ -275,6 +299,9 @@ sub _log {
   }
 
   my ($level, $message, @args) = @_;
+
+  utf8::encode($message)  if utf8::is_utf8($message); # handle as octets
+
   $message =~ s/^(?:[a-z0-9_-]*):\s*//i;
 
   $message = sprintf($message,@args)  if @args;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Logger/File.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Logger/File.pm?rev=1865007&r1=1865006&r2=1865007&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Logger/File.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Logger/File.pm Tue Aug 13 05:01:09 2019
@@ -37,13 +37,17 @@ use re 'taint';
 use POSIX ();
 use Time::HiRes ();
 use Mail::SpamAssassin::Logger;
-use Mail::SpamAssassin::Util qw(am_running_on_windows);
 
 our @ISA = ();
 
 # ADDING OS-DEPENDENT LINE TERMINATOR - BUG 6456
+
+# Using Mail::SpamAssassin::Util::am_running_on_windows() leads to circular
+# dependencies. So, we are duplicating the code instead.
+use constant RUNNING_ON_WINDOWS => ($^O =~ /^(?:mswin|dos|os2)/oi);
+
 my $eol = "\n";
-if (am_running_on_windows()) {
+if (RUNNING_ON_WINDOWS) {
   $eol = "\r\n";
 }
 
@@ -103,12 +107,8 @@ sub log_message {
   $timestamp .= ' '  if $timestamp ne '';
 
   if ($self->{escape}) {
-    local $1;
-    # Bug 6583:
-    # Quote non-ascii characters as \x{XX} or \x{XXXX} (Unicode)
-    # Also quote backslash, so the log can be unescaped properly
-    $msg =~ s{([^\x20-\x5b\x5d-\x7e])}{ $1 eq '\\' ? '\\\\' :
-      sprintf(ord($1) > 255 ? '\\x{%04X}' : '\\x{%02X}', ord($1)) }egs;
+    # Bug 6583, escape
+    Mail::SpamAssassin::Logger::escape_str($msg);
   } elsif (!exists $self->{escape}) {
     # Backwards compatible pre-4.0 escaping, if $escape not given.
     # replace control characters with "_", tabs and spaces get

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Stderr.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Stderr.pm?rev=1865007&r1=1865006&r2=1865007&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Stderr.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Stderr.pm Tue Aug 13 05:01:09 2019
@@ -36,6 +36,7 @@ use re 'taint';
 
 use POSIX ();
 use Time::HiRes ();
+use Mail::SpamAssassin::Logger;
 
 our @ISA = ();
 
@@ -85,12 +86,8 @@ sub log_message {
   $timestamp .= ' '  if $timestamp ne '';
 
   if ($self->{escape}) {
-    local $1;
-    # Bug 6583:
-    # Quote non-ascii characters as \x{XX} or \x{XXXX} (Unicode)
-    # Also quote backslash, so the log can be unescaped properly
-    $msg =~ s{([^\x20-\x5b\x5d-\x7e])}{ $1 eq '\\' ? '\\\\' :
-      sprintf(ord($1) > 255 ? '\\x{%04X}' : '\\x{%02X}', ord($1)) }egs;
+    # Bug 6583, escape
+    Mail::SpamAssassin::Logger::escape_str($msg);
   } elsif (!exists $self->{escape}) {
     # Backwards compatible pre-4.0 escaping, if $escape not given.
     # replace control characters with "_", tabs and spaces get

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Syslog.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Syslog.pm?rev=1865007&r1=1865006&r2=1865007&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Syslog.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Logger/Syslog.pm Tue Aug 13 05:01:09 2019
@@ -150,12 +150,8 @@ sub log_message {
   }
 
   if ($self->{escape}) {
-    local $1;
-    # Bug 6583:
-    # Quote non-ascii characters as \x{XX} or \x{XXXX} (Unicode)
-    # Also quote backslash, so the log can be unescaped properly
-    $msg =~ s{([^\x20-\x5b\x5d-\x7e])}{ $1 eq '\\' ? '\\\\' :
-      sprintf(ord($1) > 255 ? '\\x{%04X}' : '\\x{%02X}', ord($1)) }egs;
+    # Bug 6583, escape
+    Mail::SpamAssassin::Logger::escape_str($msg);
   } elsif (!exists $self->{escape}) {
     # Backwards compatible pre-4.0 escaping, if $escape not given
     # replace control characters with "_", tabs and spaces get

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm?rev=1865007&r1=1865006&r2=1865007&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/PerMsgStatus.pm Tue Aug 13 05:01:09 2019
@@ -944,10 +944,6 @@ sub get_content_preview {
   $str =~ s/[-_*.]{10,}//gs;
   $str =~ s/\s+/ /gs;
 
-  # escape non-ascii stuff like Logger does
-  $str =~ s{([^\n\x20-\x5b\x5d-\x7e])}{ $1 eq '\\' ? '\\\\' :
-    sprintf(ord($1) > 255 ? '\\x{%04X}' : '\\x{%02X}', ord($1)) }egs;
-
   # add "Content preview:" ourselves, so that the text aligns
   # correctly with the template -- then trim it off.  We don't
   # have to get this *exactly* right, but it's nicer if we