You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spamassassin.apache.org by he...@apache.org on 2022/05/01 09:25:11 UTC

svn commit: r1900446 - in /spamassassin/trunk/lib/Mail/SpamAssassin: Message/Node.pm Plugin/DecodeShortURLs.pm Plugin/OLEVBMacro.pm

Author: hege
Date: Sun May  1 09:25:11 2022
New Revision: 1900446

URL: http://svn.apache.org/viewvc?rev=1900446&view=rev
Log:
Fix perlcritic

Modified:
    spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm
    spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/OLEVBMacro.pm

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm?rev=1900446&r1=1900445&r2=1900446&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm Sun May  1 09:25:11 2022
@@ -389,7 +389,7 @@ sub detect_utf16 {
 	# avoid scan if BOM present
 	if( $data =~ /^(?:\xff\xfe|\xfe\xff)/ ) {
 		dbg( "message: detect_utf16: found BOM" );
-		return undef;	# let perl figure it out from the BOM
+		return;	# let perl figure it out from the BOM
 	}
 	
 	my @msg_h = unpack 'H' x length( $data ), $data;

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm?rev=1900446&r1=1900445&r2=1900446&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/DecodeShortURLs.pm Sun May  1 09:25:11 2022
@@ -84,8 +84,6 @@ could not have been developed without hi
 
 package Mail::SpamAssassin::Plugin::DecodeShortURLs;
 
-my $VERSION = 0.12;
-
 use Mail::SpamAssassin::Plugin;
 use strict;
 use warnings;
@@ -93,6 +91,8 @@ use warnings;
 use vars qw(@ISA);
 @ISA = qw(Mail::SpamAssassin::Plugin);
 
+my $VERSION = 0.12;
+
 use constant HAS_LWP_USERAGENT => eval { require LWP::UserAgent; };
 use constant HAS_DBI => eval { require DBI; };
 
@@ -319,7 +319,7 @@ sub initialise_url_shortener_cache {
     return _connect_dbi_cache($self, $opts);
   } else {
     warn "Wrong cache type selected";
-    return undef;
+    return;
   }
 }
 
@@ -438,7 +438,7 @@ sub check_dnsbl {
   # Make sure we have some work to do
   # Before we open any log files etc.
   my $count = scalar keys %short_urls;
-  return undef unless $count gt 0;
+  return unless $count gt 0;
 
   $self->initialise_url_shortener_cache($opts) if defined $self->{url_shortener_cache_type};
 
@@ -459,7 +459,7 @@ sub recursive_lookup {
     dbg("Error: more than 10 shortener redirections");
     # Fire test
     $self->{short_url_maxchain} = 1;
-    return undef;
+    return;
   }
 
   my $location;
@@ -481,11 +481,11 @@ sub recursive_lookup {
       dbg("URL is not redirect: $short_url = ".$response->status_line);
       $pms->{short_url_200} = 1 if($response->code == '200');
       $pms->{short_url_404} = 1 if($response->code == '404');
-      return undef;
+      return;
     }
     $location = $response->headers->{location};
     # Bail out if $short_url redirects to itself
-    return undef if ($short_url eq $location);
+    return if ($short_url eq $location);
     if ($self->{caching}) {
       if ($self->cache_add($short_url, $location)) {
         dbg("Added $short_url to cache");
@@ -572,7 +572,7 @@ sub cache_add {
 
 sub cache_get {
   my ($self, $key) = @_;
-  return undef if not $self->{caching};
+  return if not $self->{caching};
 
   eval {
     $self->{sth_select} = $self->{dbh}->prepare_cached("
@@ -582,7 +582,7 @@ sub cache_get {
   };
   if ($@) {
    dbg("warn: $@");
-   return undef;
+   return;
   }
 
   eval {
@@ -594,7 +594,7 @@ sub cache_get {
   };
   if ($@) {
    dbg("warn: $@");
-   return undef;
+   return;
   }
 
   my $tcheck = time() - $self->{url_shortener_cache_ttl};
@@ -610,7 +610,7 @@ sub cache_get {
 
   $self->{sth_select}->finish();
   $self->{sth_update}->finish();
-  return undef;
+  return;
 }
 
 # Version features

Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/OLEVBMacro.pm
URL: http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/OLEVBMacro.pm?rev=1900446&r1=1900445&r2=1900446&view=diff
==============================================================================
--- spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/OLEVBMacro.pm (original)
+++ spamassassin/trunk/lib/Mail/SpamAssassin/Plugin/OLEVBMacro.pm Sun May  1 09:25:11 2022
@@ -819,7 +819,7 @@ sub _get_part_details {
 sub _open_zip_handle {
   my ($data) = @_;
 
-  return undef unless HAS_ARCHIVE_ZIP && HAS_IO_STRING;
+  return unless HAS_ARCHIVE_ZIP && HAS_IO_STRING;
 
   # open our archive from raw data
   my $SH = IO::String->new($data);
@@ -829,7 +829,7 @@ sub _open_zip_handle {
     dbg("cannot read zipfile");
     # as we cannot read it its not a zip (or too big/corrupted)
     # so skip processing.
-    return undef;
+    return;
   }
 
   return $zip;



Re: svn commit: r1900446 - in /spamassassin/trunk/lib/Mail/SpamAssassin: Message/Node.pm Plugin/DecodeShortURLs.pm Plugin/OLEVBMacro.pm

Posted by "Kevin A. McGrail" <km...@apache.org>.
On 5/1/2022 6:23 PM, Bill Cole wrote:
> It helps to understand that detect_utf16() is called in exactly one 
> place: in another part of Node.pm. It is called there in scalar 
> context. In scalar context, 'return' without an argument returns 
> undef. There's no logical reason for it to ever be called in list 
> context, since what it is returning is (a pointer to) an Encode object. 
And a pointer isn't going to cause any heavy computation.  Perhaps just 
a cleaner comment then and we are good or just ignore it because we have 
bigger fish to fry!

-- 
Kevin A. McGrail
KMcGrail@Apache.org

Member, Apache Software Foundation
Chair Emeritus Apache SpamAssassin Project
https://www.linkedin.com/in/kmcgrail - 703.798.0171


Re: svn commit: r1900446 - in /spamassassin/trunk/lib/Mail/SpamAssassin: Message/Node.pm Plugin/DecodeShortURLs.pm Plugin/OLEVBMacro.pm

Posted by Bill Cole <bi...@apache.org>.
On 2022-05-01 at 16:33:13 UTC-0400 (Sun, 1 May 2022 16:33:13 -0400)
Kevin A. McGrail <km...@apache.org>
is rumored to have said:

> On 5/1/2022 4:12 PM, Michael Storz wrote:
>> Kevin, the change from 'return undef' to "return" is correct because 
>> return returns undef in the scalar context. "return undef" should 
>> only be used when evaluated in the array context and the value undef 
>> is needed instead of ().
>>
>> I only looked at the case for detect_utf16. If the change is ok for 
>> the other returns, you still have to investigate if you don't trust 
>> the change.
> It's not a matter of trust, it's a matter of changing return undef 
> where there is a comment that says "# let perl figure it out from the 
> BOM".  That comment worries me that the return undef was purposeful.

I don't see how the explicit 'return undef' can have any specific 
relationship to that comment. The code checks for a UTF16 BOM and if it 
finds one, does not bother trying to detect whether the data is LE or 
BE, because with a BOM, Perl (Encode::Detect::Detector) determines that 
without help when asked to, later in the block that called 
detect_utf16(). Without a BOM, detect_utf16() is needed to look at the 
data and try to guess the endianness. With a BOM, detect_utf16() is a 
no-op and ultimately Encode::Detect::Detector gets used (and should work 
just fine.)

It helps to understand that detect_utf16() is called in exactly one 
place: in another part of Node.pm. It is called there in scalar context. 
In scalar context, 'return' without an argument returns undef. There's 
no logical reason for it to ever be called in list context, since what 
it is returning is (a pointer to) an Encode object.

Re: svn commit: r1900446 - in /spamassassin/trunk/lib/Mail/SpamAssassin: Message/Node.pm Plugin/DecodeShortURLs.pm Plugin/OLEVBMacro.pm

Posted by "Kevin A. McGrail" <km...@apache.org>.
On 5/1/2022 4:12 PM, Michael Storz wrote:
> Kevin, the change from 'return undef' to "return" is correct because 
> return returns undef in the scalar context. "return undef" should only 
> be used when evaluated in the array context and the value undef is 
> needed instead of ().
>
> I only looked at the case for detect_utf16. If the change is ok for 
> the other returns, you still have to investigate if you don't trust 
> the change. 
It's not a matter of trust, it's a matter of changing return undef where 
there is a comment that says "# let perl figure it out from the BOM".  
That comment worries me that the return undef was purposeful.

-- 
Kevin A. McGrail
KMcGrail@Apache.org

Member, Apache Software Foundation
Chair Emeritus Apache SpamAssassin Project
https://www.linkedin.com/in/kmcgrail - 703.798.0171


Re: svn commit: r1900446 - in /spamassassin/trunk/lib/Mail/SpamAssassin: Message/Node.pm Plugin/DecodeShortURLs.pm Plugin/OLEVBMacro.pm

Posted by Michael Storz <Mi...@lrz.de>.
Am 2022-05-01 20:02, schrieb Kevin A. McGrail:
> On 5/1/2022 1:28 PM, Michael Storz wrote:
>> Am 2022-05-01 18:22, schrieb Kevin A. McGrail:
>>> Morning Hege,
>>> 
>>> This change worries me.  What does the comment "let per figure it out
>>> from the BOM" mean and does the change on the return undef change
>>> that?
>>> 
>>> Worried that Perl Critic is complaining about something that was done
>>> on purpose.
>>> 
>>> Do we have a good test case for this change?  if not can we 
>>> synthesize
>>> a spample and add a test for it?
>>> 
>>> Regards,
>>> KAM
>>> 
>>> On 5/1/2022 5:25 AM, hege@apache.org wrote:
>>>> Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
>>>> URL:http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm?rev=1900446&r1=1900445&r2=1900446&view=diff 
>>>> ============================================================================== 
>>>> --- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm 
>>>> (original)
>>>> +++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm Sun 
>>>> May  1 09:25:11 2022
>>>> @@ -389,7 +389,7 @@ sub detect_utf16 {
>>>>       # avoid scan if BOM present
>>>>       if( $data =~ /^(?:\xff\xfe|\xfe\xff)/ ) {
>>>>           dbg( "message: detect_utf16: found BOM" );
>>>> -        return undef;    # let perl figure it out from the BOM
>>>> +        return;    # let perl figure it out from the BOM
>>>>       }
>>>> 
>> 
>> Kevin,
>> 
>> using "return undef" on purpose means the subroutine is called in list 
>> context and instead of an empty list you want undef.
>> 
>> grep -r detect_utf16 lib/Mail/ shows
>> 
>> lib/Mail/SpamAssassin/Message/Node.pm:sub detect_utf16 {
>> lib/Mail/SpamAssassin/Message/Node.pm:        dbg( "message: 
>> detect_utf16: found BOM" );
>> lib/Mail/SpamAssassin/Message/Node.pm:        dbg( "message: 
>> detect_utf16: UTF-16LE" );
>> lib/Mail/SpamAssassin/Message/Node.pm:        dbg( "message: 
>> detect_utf16: UTF-16BE" );
>> lib/Mail/SpamAssassin/Message/Node.pm:        dbg( "message: 
>> detect_utf16: Could not detect UTF-16 endianness" );
>> lib/Mail/SpamAssassin/Message/Node.pm:    my $decoder = detect_utf16( 
>> $_[0] );
>> 
>> which means detect_utf16 is called once in scalar context. That means 
>> the change is correct.
>> 
>> Michael
> 
> Michael, Hege's change removed return undef; which I think might be
> wanted in a few cases and that PerlCritic is a FP in this patch.  You
> seem to be agreeing that return undef is warranted which is why I
> raised the issue here.  Can you confirm that is what you are saying?
> 
> Regards,
> 
> KAM

Kevin, the change from 'return undef' to "return" is correct because 
return returns undef in the scalar context. "return undef" should only 
be used when evaluated in the array context and the value undef is 
needed instead of ().

I only looked at the case for detect_utf16. If the change is ok for the 
other returns, you still have to investigate if you don't trust the 
change.

Regards,
Michael

Re: svn commit: r1900446 - in /spamassassin/trunk/lib/Mail/SpamAssassin: Message/Node.pm Plugin/DecodeShortURLs.pm Plugin/OLEVBMacro.pm

Posted by "Kevin A. McGrail" <km...@apache.org>.
On 5/1/2022 1:28 PM, Michael Storz wrote:
> Am 2022-05-01 18:22, schrieb Kevin A. McGrail:
>> Morning Hege,
>>
>> This change worries me.  What does the comment "let per figure it out
>> from the BOM" mean and does the change on the return undef change
>> that?
>>
>> Worried that Perl Critic is complaining about something that was done
>> on purpose.
>>
>> Do we have a good test case for this change?  if not can we synthesize
>> a spample and add a test for it?
>>
>> Regards,
>> KAM
>>
>> On 5/1/2022 5:25 AM, hege@apache.org wrote:
>>> Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
>>> URL:http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm?rev=1900446&r1=1900445&r2=1900446&view=diff 
>>>
>>> ============================================================================== 
>>>
>>> --- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm (original)
>>> +++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm Sun 
>>> May  1 09:25:11 2022
>>> @@ -389,7 +389,7 @@ sub detect_utf16 {
>>>       # avoid scan if BOM present
>>>       if( $data =~ /^(?:\xff\xfe|\xfe\xff)/ ) {
>>>           dbg( "message: detect_utf16: found BOM" );
>>> -        return undef;    # let perl figure it out from the BOM
>>> +        return;    # let perl figure it out from the BOM
>>>       }
>>>
>
> Kevin,
>
> using "return undef" on purpose means the subroutine is called in list 
> context and instead of an empty list you want undef.
>
> grep -r detect_utf16 lib/Mail/ shows
>
> lib/Mail/SpamAssassin/Message/Node.pm:sub detect_utf16 {
> lib/Mail/SpamAssassin/Message/Node.pm:        dbg( "message: 
> detect_utf16: found BOM" );
> lib/Mail/SpamAssassin/Message/Node.pm:        dbg( "message: 
> detect_utf16: UTF-16LE" );
> lib/Mail/SpamAssassin/Message/Node.pm:        dbg( "message: 
> detect_utf16: UTF-16BE" );
> lib/Mail/SpamAssassin/Message/Node.pm:        dbg( "message: 
> detect_utf16: Could not detect UTF-16 endianness" );
> lib/Mail/SpamAssassin/Message/Node.pm:    my $decoder = detect_utf16( 
> $_[0] );
>
> which means detect_utf16 is called once in scalar context. That means 
> the change is correct.
>
> Michael

Michael, Hege's change removed return undef; which I think might be 
wanted in a few cases and that PerlCritic is a FP in this patch.  You 
seem to be agreeing that return undef is warranted which is why I raised 
the issue here.  Can you confirm that is what you are saying?

Regards,

KAM

-- 
Kevin A. McGrail
KMcGrail@Apache.org

Member, Apache Software Foundation
Chair Emeritus Apache SpamAssassin Project
https://www.linkedin.com/in/kmcgrail - 703.798.0171


Re: svn commit: r1900446 - in /spamassassin/trunk/lib/Mail/SpamAssassin: Message/Node.pm Plugin/DecodeShortURLs.pm Plugin/OLEVBMacro.pm

Posted by Michael Storz <Mi...@lrz.de>.
Am 2022-05-01 18:22, schrieb Kevin A. McGrail:
> Morning Hege,
> 
> This change worries me.  What does the comment "let per figure it out
> from the BOM" mean and does the change on the return undef change
> that?
> 
> Worried that Perl Critic is complaining about something that was done
> on purpose.
> 
> Do we have a good test case for this change?  if not can we synthesize
> a spample and add a test for it?
> 
> Regards,
> KAM
> 
> On 5/1/2022 5:25 AM, hege@apache.org wrote:
>> Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
>> URL:http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm?rev=1900446&r1=1900445&r2=1900446&view=diff
>> ==============================================================================
>> --- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm 
>> (original)
>> +++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm Sun May  
>> 1 09:25:11 2022
>> @@ -389,7 +389,7 @@ sub detect_utf16 {
>>   	# avoid scan if BOM present
>>   	if( $data =~ /^(?:\xff\xfe|\xfe\xff)/ ) {
>>   		dbg( "message: detect_utf16: found BOM" );
>> -		return undef;	# let perl figure it out from the BOM
>> +		return;	# let perl figure it out from the BOM
>>   	}
>> 

Kevin,

using "return undef" on purpose means the subroutine is called in list 
context and instead of an empty list you want undef.

grep -r detect_utf16 lib/Mail/ shows

lib/Mail/SpamAssassin/Message/Node.pm:sub detect_utf16 {
lib/Mail/SpamAssassin/Message/Node.pm:		dbg( "message: detect_utf16: 
found BOM" );
lib/Mail/SpamAssassin/Message/Node.pm:		dbg( "message: detect_utf16: 
UTF-16LE" );
lib/Mail/SpamAssassin/Message/Node.pm:		dbg( "message: detect_utf16: 
UTF-16BE" );
lib/Mail/SpamAssassin/Message/Node.pm:		dbg( "message: detect_utf16: 
Could not detect UTF-16 endianness" );
lib/Mail/SpamAssassin/Message/Node.pm:    my $decoder = detect_utf16( 
$_[0] );

which means detect_utf16 is called once in scalar context. That means 
the change is correct.

Michael

Re: svn commit: r1900446 - in /spamassassin/trunk/lib/Mail/SpamAssassin: Message/Node.pm Plugin/DecodeShortURLs.pm Plugin/OLEVBMacro.pm

Posted by "Kevin A. McGrail" <km...@apache.org>.
Morning Hege,

This change worries me.  What does the comment "let per figure it out 
from the BOM" mean and does the change on the return undef change that?

Worried that Perl Critic is complaining about something that was done on 
purpose.

Do we have a good test case for this change?  if not can we synthesize a 
spample and add a test for it?

Regards,
KAM

On 5/1/2022 5:25 AM, hege@apache.org wrote:
> Modified: spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm
> URL:http://svn.apache.org/viewvc/spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm?rev=1900446&r1=1900445&r2=1900446&view=diff
> ==============================================================================
> --- spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm (original)
> +++ spamassassin/trunk/lib/Mail/SpamAssassin/Message/Node.pm Sun May  1 09:25:11 2022
> @@ -389,7 +389,7 @@ sub detect_utf16 {
>   	# avoid scan if BOM present
>   	if( $data =~ /^(?:\xff\xfe|\xfe\xff)/ ) {
>   		dbg( "message: detect_utf16: found BOM" );
> -		return undef;	# let perl figure it out from the BOM
> +		return;	# let perl figure it out from the BOM
>   	}
>   	

-- 
Kevin A. McGrail
KMcGrail@Apache.org

Member, Apache Software Foundation
Chair Emeritus Apache SpamAssassin Project
https://www.linkedin.com/in/kmcgrail - 703.798.0171