[PATCH v2] checkpatch: improve email parsing

From: Dwaipayan Ray
Date: Tue Nov 03 2020 - 00:48:42 EST


checkpatch doesn't report warnings for many common mistakes
in emails. Some of which are trailing commas and incorrect
use of email comments.

At the same time several false positives are reported due to
incorrect handling of mail comments. The most common of which
is due to the pattern:

<stable@xxxxxxxxxxxxxxx> # X.X

Improve email parsing mechanism in checkpatch.

What is added:

- Support for multiple name/address comments.
- Improved handling of quoted names.
- Sanitize improperly formatted comments.
- Sanitize trailing semicolon or dot after email.

Signed-off-by: Dwaipayan Ray <dwaipayanray1@xxxxxxxxx>
---
scripts/checkpatch.pl | 44 +++++++++++++++++++++++++++++--------------
1 file changed, 30 insertions(+), 14 deletions(-)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index fab38b493cef..9a9049480077 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -1152,6 +1152,7 @@ sub parse_email {
my ($formatted_email) = @_;

my $name = "";
+ my $quoted = "";
my $name_comment = "";
my $address = "";
my $comment = "";
@@ -1183,14 +1184,25 @@ sub parse_email {
}
}

- $comment = trim($comment);
- $name = trim($name);
- $name =~ s/^\"|\"$//g;
- if ($name =~ s/(\s*\([^\)]+\))\s*//) {
- $name_comment = trim($1);
+ # Extract comments from names excluding quoted parts
+ # "John A. (Kennedy)" - Do not extract
+ if ($name =~ s/\"(.+)\"//) {
+ $quoted = $1;
+ }
+ while ($name =~ s/\s*($balanced_parens)\s*/ /) {
+ $name_comment .= trim($1);
+ }
+ $name =~ s/^[ \"]|[ \"]$//g;
+ $name = trim("$quoted $name");
+
+ # Extract comments from address
+ # <john(his mail)@doe.com>
+ while ($address =~ s/\s*($balanced_parens)\s*//) {
+ $comment .= trim($1);
}
$address = trim($address);
$address =~ s/^\<|\>$//g;
+ $comment = trim($comment);

if ($name =~ /[^\w \-]/i) { ##has "must quote" chars
$name =~ s/(?<!\\)"/\\"/g; ##escape quotes
@@ -1205,17 +1217,25 @@ sub format_email {

my $formatted_email;

- $name_comment = trim($name_comment);
- $comment = trim($comment);
$name = trim($name);
$name =~ s/^\"|\"$//g;
$address = trim($address);
+ $address =~ s/(?:\.|\,)*$//; ##trailing commas or dots

if ($name =~ /[^\w \-]/i) { ##has "must quote" chars
$name =~ s/(?<!\\)"/\\"/g; ##escape quotes
$name = "\"$name\"";
}

+ $name_comment = trim($name_comment);
+ $name_comment =~ s/(.+)/ $1/;
+
+ # Sanitize comment
+ $comment = trim($comment);
+ if ($comment ne "" && $comment !~ s/^\s*((?:\#|\(|\/|\[).*)/ $1/) {
+ $comment = "";
+ }
+
if ("$name" eq "") {
$formatted_email = "$address";
} else {
@@ -1233,15 +1253,11 @@ sub reformat_email {
}

sub same_email_addresses {
- my ($email1, $email2, $match_comment) = @_;
+ my ($email1, $email2) = @_;

my ($email1_name, $name1_comment, $email1_address, $comment1) = parse_email($email1);
my ($email2_name, $name2_comment, $email2_address, $comment2) = parse_email($email2);

- if ($match_comment != 1) {
- return $email1_name eq $email2_name &&
- $email1_address eq $email2_address;
- }
return $email1_name eq $email2_name &&
$email1_address eq $email2_address &&
$name1_comment eq $name2_comment &&
@@ -2704,7 +2720,7 @@ sub process {
$signoff++;
$in_commit_log = 0;
if ($author ne '' && $authorsignoff != 1) {
- if (same_email_addresses($1, $author, 1)) {
+ if (same_email_addresses($1, $author)) {
$authorsignoff = 1;
} else {
my $ctx = $1;
@@ -2800,7 +2816,7 @@ sub process {
$dequoted =~ s/" </ </;
# Don't force email to have quotes
# Allow just an angle bracketed address
- if (!same_email_addresses($email, $suggested_email, 0)) {
+ if (!same_email_addresses($email, $suggested_email)) {
WARN("BAD_SIGN_OFF",
"email address '$email' might be better as '$suggested_email'\n" . $herecurr);
}
--
2.27.0