scripts/get_maintainer.pl: use .get_maintainer.conf from . then $HOME then scripts
[linux-block.git] / scripts / get_maintainer.pl
CommitLineData
cb7301c7
JP
1#!/usr/bin/perl -w
2# (c) 2007, Joe Perches <joe@perches.com>
3# created from checkpatch.pl
4#
5# Print selected MAINTAINERS information for
6# the files modified in a patch or for a file
7#
3bd7bf5f
RK
8# usage: perl scripts/get_maintainer.pl [OPTIONS] <patch>
9# perl scripts/get_maintainer.pl [OPTIONS] -f <file>
cb7301c7
JP
10#
11# Licensed under the terms of the GNU GPL License version 2
12
13use strict;
14
15my $P = $0;
63ab52db 16my $V = '0.25';
cb7301c7
JP
17
18use Getopt::Long qw(:config no_auto_abbrev);
19
20my $lk_path = "./";
21my $email = 1;
22my $email_usename = 1;
23my $email_maintainer = 1;
24my $email_list = 1;
25my $email_subscriber_list = 0;
cb7301c7 26my $email_git_penguin_chiefs = 0;
e3e9d114 27my $email_git = 0;
0fa05599 28my $email_git_all_signature_types = 0;
60db31ac 29my $email_git_blame = 0;
e3e9d114 30my $email_git_fallback = 1;
cb7301c7
JP
31my $email_git_min_signatures = 1;
32my $email_git_max_maintainers = 5;
afa81ee1 33my $email_git_min_percent = 5;
cb7301c7 34my $email_git_since = "1-year-ago";
60db31ac 35my $email_hg_since = "-365";
11ecf53c 36my $email_remove_duplicates = 1;
cb7301c7
JP
37my $output_multiline = 1;
38my $output_separator = ", ";
3c7385b8
JP
39my $output_roles = 0;
40my $output_rolestats = 0;
cb7301c7
JP
41my $scm = 0;
42my $web = 0;
43my $subsystem = 0;
44my $status = 0;
dcf36a92 45my $keywords = 1;
4b76c9da 46my $sections = 0;
03372dbb 47my $file_emails = 0;
4a7fdb5f 48my $from_filename = 0;
3fb55652 49my $pattern_depth = 0;
cb7301c7
JP
50my $version = 0;
51my $help = 0;
52
53my $exit = 0;
54
55my @penguin_chief = ();
e4d26b02 56push(@penguin_chief, "Linus Torvalds:torvalds\@linux-foundation.org");
cb7301c7 57#Andrew wants in on most everything - 2009/01/14
e4d26b02 58#push(@penguin_chief, "Andrew Morton:akpm\@linux-foundation.org");
cb7301c7
JP
59
60my @penguin_chief_names = ();
61foreach my $chief (@penguin_chief) {
62 if ($chief =~ m/^(.*):(.*)/) {
63 my $chief_name = $1;
64 my $chief_addr = $2;
65 push(@penguin_chief_names, $chief_name);
66 }
67}
e4d26b02
JP
68my $penguin_chiefs = "\(" . join("|", @penguin_chief_names) . "\)";
69
70# Signature types of people who are either
71# a) responsible for the code in question, or
72# b) familiar enough with it to give relevant feedback
73my @signature_tags = ();
74push(@signature_tags, "Signed-off-by:");
75push(@signature_tags, "Reviewed-by:");
76push(@signature_tags, "Acked-by:");
77my $signaturePattern = "\(" . join("|", @signature_tags) . "\)";
cb7301c7 78
5f2441e9 79# rfc822 email address - preloaded methods go here.
1b5e1cf6 80my $rfc822_lwsp = "(?:(?:\\r\\n)?[ \\t])";
df4cc036 81my $rfc822_char = '[\\000-\\377]';
1b5e1cf6 82
60db31ac
JP
83# VCS command support: class-like functions and strings
84
85my %VCS_cmds;
86
87my %VCS_cmds_git = (
88 "execute_cmd" => \&git_execute_cmd,
89 "available" => '(which("git") ne "") && (-d ".git")',
99cf6116
RK
90 "find_signers_cmd" => "git log --no-color --since=\$email_git_since -- \$file",
91 "find_commit_signers_cmd" => "git log --no-color -1 \$commit",
63ab52db 92 "find_commit_author_cmd" => "git log -1 --format=\"%an <%ae>\" \$commit",
60db31ac
JP
93 "blame_range_cmd" => "git blame -l -L \$diff_start,+\$diff_length \$file",
94 "blame_file_cmd" => "git blame -l \$file",
95 "commit_pattern" => "^commit [0-9a-f]{40,40}",
96 "blame_commit_pattern" => "^([0-9a-f]+) "
97);
98
99my %VCS_cmds_hg = (
100 "execute_cmd" => \&hg_execute_cmd,
101 "available" => '(which("hg") ne "") && (-d ".hg")',
102 "find_signers_cmd" =>
103 "hg log --date=\$email_hg_since" .
104 " --template='commit {node}\\n{desc}\\n' -- \$file",
105 "find_commit_signers_cmd" => "hg log --template='{desc}\\n' -r \$commit",
63ab52db 106 "find_commit_author_cmd" => "hg log -l 1 --template='{author}\\n' -r \$commit",
60db31ac
JP
107 "blame_range_cmd" => "", # not supported
108 "blame_file_cmd" => "hg blame -c \$file",
109 "commit_pattern" => "^commit [0-9a-f]{40,40}",
110 "blame_commit_pattern" => "^([0-9a-f]+):"
111);
112
bcde44ed
JP
113my $conf = which_conf(".get_maintainer.conf");
114if (-f $conf) {
368669da 115 my @conf_args;
bcde44ed
JP
116 open(my $conffile, '<', "$conf")
117 or warn "$P: Can't find a readable .get_maintainer.conf file $!\n";
118
368669da
JP
119 while (<$conffile>) {
120 my $line = $_;
121
122 $line =~ s/\s*\n?$//g;
123 $line =~ s/^\s*//g;
124 $line =~ s/\s+/ /g;
125
126 next if ($line =~ m/^\s*#/);
127 next if ($line =~ m/^\s*$/);
128
129 my @words = split(" ", $line);
130 foreach my $word (@words) {
131 last if ($word =~ m/^#/);
132 push (@conf_args, $word);
133 }
134 }
135 close($conffile);
136 unshift(@ARGV, @conf_args) if @conf_args;
137}
138
cb7301c7
JP
139if (!GetOptions(
140 'email!' => \$email,
141 'git!' => \$email_git,
e4d26b02 142 'git-all-signature-types!' => \$email_git_all_signature_types,
60db31ac 143 'git-blame!' => \$email_git_blame,
e3e9d114 144 'git-fallback!' => \$email_git_fallback,
cb7301c7
JP
145 'git-chief-penguins!' => \$email_git_penguin_chiefs,
146 'git-min-signatures=i' => \$email_git_min_signatures,
147 'git-max-maintainers=i' => \$email_git_max_maintainers,
afa81ee1 148 'git-min-percent=i' => \$email_git_min_percent,
cb7301c7 149 'git-since=s' => \$email_git_since,
60db31ac 150 'hg-since=s' => \$email_hg_since,
11ecf53c 151 'remove-duplicates!' => \$email_remove_duplicates,
cb7301c7
JP
152 'm!' => \$email_maintainer,
153 'n!' => \$email_usename,
154 'l!' => \$email_list,
155 's!' => \$email_subscriber_list,
156 'multiline!' => \$output_multiline,
3c7385b8
JP
157 'roles!' => \$output_roles,
158 'rolestats!' => \$output_rolestats,
cb7301c7
JP
159 'separator=s' => \$output_separator,
160 'subsystem!' => \$subsystem,
161 'status!' => \$status,
162 'scm!' => \$scm,
163 'web!' => \$web,
3fb55652 164 'pattern-depth=i' => \$pattern_depth,
dcf36a92 165 'k|keywords!' => \$keywords,
4b76c9da 166 'sections!' => \$sections,
03372dbb 167 'fe|file-emails!' => \$file_emails,
4a7fdb5f 168 'f|file' => \$from_filename,
cb7301c7 169 'v|version' => \$version,
64f77f31 170 'h|help|usage' => \$help,
cb7301c7 171 )) {
3c7385b8 172 die "$P: invalid argument - use --help if necessary\n";
cb7301c7
JP
173}
174
175if ($help != 0) {
176 usage();
177 exit 0;
178}
179
180if ($version != 0) {
181 print("${P} ${V}\n");
182 exit 0;
183}
184
64f77f31
JP
185if (-t STDIN && !@ARGV) {
186 # We're talking to a terminal, but have no command line arguments.
187 die "$P: missing patchfile or -f file - use --help if necessary\n";
cb7301c7
JP
188}
189
42498316
JP
190if ($output_separator ne ", ") {
191 $output_multiline = 0;
192}
193
3c7385b8
JP
194if ($output_rolestats) {
195 $output_roles = 1;
196}
197
4b76c9da
JP
198if ($sections) {
199 $email = 0;
200 $email_list = 0;
201 $scm = 0;
202 $status = 0;
203 $subsystem = 0;
204 $web = 0;
205 $keywords = 0;
206} else {
207 my $selections = $email + $scm + $status + $subsystem + $web;
208 if ($selections == 0) {
4b76c9da
JP
209 die "$P: Missing required option: email, scm, status, subsystem or web\n";
210 }
cb7301c7
JP
211}
212
f5492666
JP
213if ($email &&
214 ($email_maintainer + $email_list + $email_subscriber_list +
215 $email_git + $email_git_penguin_chiefs + $email_git_blame) == 0) {
cb7301c7
JP
216 die "$P: Please select at least 1 email option\n";
217}
218
219if (!top_of_kernel_tree($lk_path)) {
220 die "$P: The current directory does not appear to be "
221 . "a linux kernel source tree.\n";
222}
223
e4d26b02
JP
224if ($email_git_all_signature_types) {
225 $signaturePattern = "(.+?)[Bb][Yy]:";
226}
227
cb7301c7
JP
228## Read MAINTAINERS for type/value pairs
229
230my @typevalue = ();
dcf36a92
JP
231my %keyword_hash;
232
22dd5b0c
SH
233open (my $maint, '<', "${lk_path}MAINTAINERS")
234 or die "$P: Can't open MAINTAINERS: $!\n";
235while (<$maint>) {
cb7301c7
JP
236 my $line = $_;
237
238 if ($line =~ m/^(\C):\s*(.*)/) {
239 my $type = $1;
240 my $value = $2;
241
242 ##Filename pattern matching
243 if ($type eq "F" || $type eq "X") {
244 $value =~ s@\.@\\\.@g; ##Convert . to \.
245 $value =~ s/\*/\.\*/g; ##Convert * to .*
246 $value =~ s/\?/\./g; ##Convert ? to .
870020f9
JP
247 ##if pattern is a directory and it lacks a trailing slash, add one
248 if ((-d $value)) {
249 $value =~ s@([^/])$@$1/@;
250 }
dcf36a92
JP
251 } elsif ($type eq "K") {
252 $keyword_hash{@typevalue} = $value;
cb7301c7
JP
253 }
254 push(@typevalue, "$type:$value");
255 } elsif (!/^(\s)*$/) {
256 $line =~ s/\n$//g;
257 push(@typevalue, $line);
258 }
259}
22dd5b0c 260close($maint);
cb7301c7 261
8cbb3a77
JP
262my %mailmap;
263
11ecf53c 264if ($email_remove_duplicates) {
22dd5b0c
SH
265 open(my $mailmap, '<', "${lk_path}.mailmap")
266 or warn "$P: Can't open .mailmap: $!\n";
267 while (<$mailmap>) {
11ecf53c 268 my $line = $_;
8cbb3a77 269
11ecf53c
JP
270 next if ($line =~ m/^\s*#/);
271 next if ($line =~ m/^\s*$/);
8cbb3a77 272
11ecf53c 273 my ($name, $address) = parse_email($line);
a8af2430 274 $line = format_email($name, $address, $email_usename);
8cbb3a77 275
11ecf53c 276 next if ($line =~ m/^\s*$/);
8cbb3a77 277
11ecf53c
JP
278 if (exists($mailmap{$name})) {
279 my $obj = $mailmap{$name};
280 push(@$obj, $address);
281 } else {
282 my @arr = ($address);
283 $mailmap{$name} = \@arr;
284 }
8cbb3a77 285 }
22dd5b0c 286 close($mailmap);
8cbb3a77
JP
287}
288
4a7fdb5f 289## use the filenames on the command line or find the filenames in the patchfiles
cb7301c7
JP
290
291my @files = ();
f5492666 292my @range = ();
dcf36a92 293my @keyword_tvi = ();
03372dbb 294my @file_emails = ();
cb7301c7 295
64f77f31
JP
296if (!@ARGV) {
297 push(@ARGV, "&STDIN");
298}
299
4a7fdb5f 300foreach my $file (@ARGV) {
64f77f31
JP
301 if ($file ne "&STDIN") {
302 ##if $file is a directory and it lacks a trailing slash, add one
303 if ((-d $file)) {
304 $file =~ s@([^/])$@$1/@;
305 } elsif (!(-f $file)) {
306 die "$P: file '${file}' not found\n";
307 }
cb7301c7 308 }
4a7fdb5f
JP
309 if ($from_filename) {
310 push(@files, $file);
fab9ed12 311 if ($file ne "MAINTAINERS" && -f $file && ($keywords || $file_emails)) {
22dd5b0c
SH
312 open(my $f, '<', $file)
313 or die "$P: Can't open $file: $!\n";
314 my $text = do { local($/) ; <$f> };
315 close($f);
03372dbb
JP
316 if ($keywords) {
317 foreach my $line (keys %keyword_hash) {
318 if ($text =~ m/$keyword_hash{$line}/x) {
319 push(@keyword_tvi, $line);
320 }
dcf36a92
JP
321 }
322 }
03372dbb
JP
323 if ($file_emails) {
324 my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g;
325 push(@file_emails, clean_file_emails(@poss_addr));
326 }
dcf36a92 327 }
4a7fdb5f
JP
328 } else {
329 my $file_cnt = @files;
f5492666 330 my $lastfile;
22dd5b0c 331
3a4df13d 332 open(my $patch, "< $file")
22dd5b0c
SH
333 or die "$P: Can't open $file: $!\n";
334 while (<$patch>) {
dcf36a92 335 my $patch_line = $_;
4a7fdb5f
JP
336 if (m/^\+\+\+\s+(\S+)/) {
337 my $filename = $1;
338 $filename =~ s@^[^/]*/@@;
339 $filename =~ s@\n@@;
f5492666 340 $lastfile = $filename;
4a7fdb5f 341 push(@files, $filename);
f5492666
JP
342 } elsif (m/^\@\@ -(\d+),(\d+)/) {
343 if ($email_git_blame) {
344 push(@range, "$lastfile:$1:$2");
345 }
dcf36a92
JP
346 } elsif ($keywords) {
347 foreach my $line (keys %keyword_hash) {
348 if ($patch_line =~ m/^[+-].*$keyword_hash{$line}/x) {
349 push(@keyword_tvi, $line);
350 }
351 }
4a7fdb5f 352 }
cb7301c7 353 }
22dd5b0c
SH
354 close($patch);
355
4a7fdb5f 356 if ($file_cnt == @files) {
7f29fd27 357 warn "$P: file '${file}' doesn't appear to be a patch. "
4a7fdb5f
JP
358 . "Add -f to options?\n";
359 }
360 @files = sort_and_uniq(@files);
cb7301c7 361 }
cb7301c7
JP
362}
363
03372dbb
JP
364@file_emails = uniq(@file_emails);
365
cb7301c7 366my @email_to = ();
290603c1 367my @list_to = ();
cb7301c7
JP
368my @scm = ();
369my @web = ();
370my @subsystem = ();
371my @status = ();
372
373# Find responsible parties
374
375foreach my $file (@files) {
376
272a8979 377 my %hash;
e3e9d114 378 my $exact_pattern_match = 0;
272a8979
JP
379 my $tvi = find_first_section();
380 while ($tvi < @typevalue) {
381 my $start = find_starting_index($tvi);
382 my $end = find_ending_index($tvi);
383 my $exclude = 0;
384 my $i;
385
386 #Do not match excluded file patterns
387
388 for ($i = $start; $i < $end; $i++) {
389 my $line = $typevalue[$i];
290603c1 390 if ($line =~ m/^(\C):\s*(.*)/) {
cb7301c7
JP
391 my $type = $1;
392 my $value = $2;
272a8979 393 if ($type eq 'X') {
cb7301c7 394 if (file_match_pattern($file, $value)) {
272a8979 395 $exclude = 1;
3c840c18 396 last;
cb7301c7
JP
397 }
398 }
399 }
cb7301c7 400 }
272a8979
JP
401
402 if (!$exclude) {
403 for ($i = $start; $i < $end; $i++) {
404 my $line = $typevalue[$i];
405 if ($line =~ m/^(\C):\s*(.*)/) {
406 my $type = $1;
407 my $value = $2;
408 if ($type eq 'F') {
409 if (file_match_pattern($file, $value)) {
410 my $value_pd = ($value =~ tr@/@@);
411 my $file_pd = ($file =~ tr@/@@);
412 $value_pd++ if (substr($value,-1,1) ne "/");
e3e9d114
JP
413 $value_pd = -1 if ($value =~ /^\.\*/);
414 $exact_pattern_match = 1 if ($value_pd >= $file_pd);
272a8979
JP
415 if ($pattern_depth == 0 ||
416 (($file_pd - $value_pd) < $pattern_depth)) {
417 $hash{$tvi} = $value_pd;
418 }
419 }
420 }
421 }
422 }
1d606b4e 423 }
272a8979 424
3c840c18 425 $tvi = $end + 1;
272a8979
JP
426 }
427
428 foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
429 add_categories($line);
6ffd9485
JP
430 if ($sections) {
431 my $i;
432 my $start = find_starting_index($line);
433 my $end = find_ending_index($line);
434 for ($i = $start; $i < $end; $i++) {
435 my $line = $typevalue[$i];
436 if ($line =~ /^[FX]:/) { ##Restore file patterns
437 $line =~ s/([^\\])\.([^\*])/$1\?$2/g;
438 $line =~ s/([^\\])\.$/$1\?/g; ##Convert . back to ?
439 $line =~ s/\\\./\./g; ##Convert \. to .
440 $line =~ s/\.\*/\*/g; ##Convert .* to *
4b76c9da 441 }
6ffd9485
JP
442 $line =~ s/^([A-Z]):/$1:\t/g;
443 print("$line\n");
4b76c9da 444 }
6ffd9485
JP
445 print("\n");
446 }
cb7301c7
JP
447 }
448
e3e9d114
JP
449 if ($email &&
450 ($email_git || ($email_git_fallback && !$exact_pattern_match))) {
60db31ac 451 vcs_file_signoffs($file);
cb7301c7
JP
452 }
453
f5492666 454 if ($email && $email_git_blame) {
60db31ac 455 vcs_file_blame($file);
f5492666 456 }
cb7301c7
JP
457}
458
dcf36a92
JP
459if ($keywords) {
460 @keyword_tvi = sort_and_uniq(@keyword_tvi);
461 foreach my $line (@keyword_tvi) {
462 add_categories($line);
463 }
464}
465
f5f5078d 466if ($email) {
cb7301c7
JP
467 foreach my $chief (@penguin_chief) {
468 if ($chief =~ m/^(.*):(.*)/) {
f5f5078d 469 my $email_address;
0e70e83d 470
a8af2430 471 $email_address = format_email($1, $2, $email_usename);
f5f5078d 472 if ($email_git_penguin_chiefs) {
3c7385b8 473 push(@email_to, [$email_address, 'chief penguin']);
f5f5078d 474 } else {
3c7385b8 475 @email_to = grep($_->[0] !~ /${email_address}/, @email_to);
cb7301c7
JP
476 }
477 }
478 }
03372dbb
JP
479
480 foreach my $email (@file_emails) {
481 my ($name, $address) = parse_email($email);
482
483 my $tmp_email = format_email($name, $address, $email_usename);
484 push_email_address($tmp_email, '');
485 add_role($tmp_email, 'in file');
486 }
cb7301c7
JP
487}
488
290603c1
JP
489if ($email || $email_list) {
490 my @to = ();
491 if ($email) {
492 @to = (@to, @email_to);
cb7301c7 493 }
290603c1 494 if ($email_list) {
290603c1 495 @to = (@to, @list_to);
290603c1 496 }
3c7385b8 497 output(merge_email(@to));
cb7301c7
JP
498}
499
500if ($scm) {
b781655a 501 @scm = uniq(@scm);
cb7301c7
JP
502 output(@scm);
503}
504
505if ($status) {
b781655a 506 @status = uniq(@status);
cb7301c7
JP
507 output(@status);
508}
509
510if ($subsystem) {
b781655a 511 @subsystem = uniq(@subsystem);
cb7301c7
JP
512 output(@subsystem);
513}
514
515if ($web) {
b781655a 516 @web = uniq(@web);
cb7301c7
JP
517 output(@web);
518}
519
520exit($exit);
521
522sub file_match_pattern {
523 my ($file, $pattern) = @_;
524 if (substr($pattern, -1) eq "/") {
525 if ($file =~ m@^$pattern@) {
526 return 1;
527 }
528 } else {
529 if ($file =~ m@^$pattern@) {
530 my $s1 = ($file =~ tr@/@@);
531 my $s2 = ($pattern =~ tr@/@@);
532 if ($s1 == $s2) {
533 return 1;
534 }
535 }
536 }
537 return 0;
538}
539
540sub usage {
541 print <<EOT;
542usage: $P [options] patchfile
870020f9 543 $P [options] -f file|directory
cb7301c7
JP
544version: $V
545
546MAINTAINER field selection options:
547 --email => print email address(es) if any
548 --git => include recent git \*-by: signers
e4d26b02
JP
549 --git-all-signature-types => include signers regardless of signature type
550 or use only ${signaturePattern} signers (default: $email_git_all_signature_types)
e3e9d114 551 --git-fallback => use git when no exact MAINTAINERS pattern (default: $email_git_fallback)
cb7301c7 552 --git-chief-penguins => include ${penguin_chiefs}
e4d26b02
JP
553 --git-min-signatures => number of signatures required (default: $email_git_min_signatures)
554 --git-max-maintainers => maximum maintainers to add (default: $email_git_max_maintainers)
555 --git-min-percent => minimum percentage of commits required (default: $email_git_min_percent)
f5492666 556 --git-blame => use git blame to find modified commits for patch or file
e4d26b02
JP
557 --git-since => git history to use (default: $email_git_since)
558 --hg-since => hg history to use (default: $email_hg_since)
cb7301c7
JP
559 --m => include maintainer(s) if any
560 --n => include name 'Full Name <addr\@domain.tld>'
561 --l => include list(s) if any
562 --s => include subscriber only list(s) if any
11ecf53c 563 --remove-duplicates => minimize duplicate email names/addresses
3c7385b8
JP
564 --roles => show roles (status:subsystem, git-signer, list, etc...)
565 --rolestats => show roles and statistics (commits/total_commits, %)
03372dbb 566 --file-emails => add email addresses found in -f file (default: 0 (off))
cb7301c7
JP
567 --scm => print SCM tree(s) if any
568 --status => print status if any
569 --subsystem => print subsystem name if any
570 --web => print website(s) if any
571
572Output type options:
573 --separator [, ] => separator for multiple entries on 1 line
42498316 574 using --separator also sets --nomultiline if --separator is not [, ]
cb7301c7
JP
575 --multiline => print 1 entry per line
576
cb7301c7 577Other options:
3fb55652 578 --pattern-depth => Number of pattern directory traversals (default: 0 (all))
dcf36a92 579 --keywords => scan patch for keywords (default: 1 (on))
4b76c9da 580 --sections => print the entire subsystem sections with pattern matches
f5f5078d 581 --version => show version
cb7301c7
JP
582 --help => show this help information
583
3fb55652 584Default options:
11ecf53c 585 [--email --git --m --n --l --multiline --pattern-depth=0 --remove-duplicates]
3fb55652 586
870020f9
JP
587Notes:
588 Using "-f directory" may give unexpected results:
f5492666
JP
589 Used with "--git", git signators for _all_ files in and below
590 directory are examined as git recurses directories.
591 Any specified X: (exclude) pattern matches are _not_ ignored.
592 Used with "--nogit", directory is used as a pattern match,
60db31ac
JP
593 no individual file within the directory or subdirectory
594 is matched.
f5492666
JP
595 Used with "--git-blame", does not iterate all files in directory
596 Using "--git-blame" is slow and may add old committers and authors
597 that are no longer active maintainers to the output.
3c7385b8
JP
598 Using "--roles" or "--rolestats" with git send-email --cc-cmd or any
599 other automated tools that expect only ["name"] <email address>
600 may not work because of additional output after <email address>.
601 Using "--rolestats" and "--git-blame" shows the #/total=% commits,
602 not the percentage of the entire file authored. # of commits is
603 not a good measure of amount of code authored. 1 major commit may
604 contain a thousand lines, 5 trivial commits may modify a single line.
60db31ac
JP
605 If git is not installed, but mercurial (hg) is installed and an .hg
606 repository exists, the following options apply to mercurial:
607 --git,
608 --git-min-signatures, --git-max-maintainers, --git-min-percent, and
609 --git-blame
610 Use --hg-since not --git-since to control date selection
368669da
JP
611 File ".get_maintainer.conf", if it exists in the linux kernel source root
612 directory, can change whatever get_maintainer defaults are desired.
613 Entries in this file can be any command line argument.
614 This file is prepended to any additional command line arguments.
615 Multiple lines and # comments are allowed.
cb7301c7
JP
616EOT
617}
618
619sub top_of_kernel_tree {
620 my ($lk_path) = @_;
621
622 if ($lk_path ne "" && substr($lk_path,length($lk_path)-1,1) ne "/") {
623 $lk_path .= "/";
624 }
625 if ( (-f "${lk_path}COPYING")
626 && (-f "${lk_path}CREDITS")
627 && (-f "${lk_path}Kbuild")
628 && (-f "${lk_path}MAINTAINERS")
629 && (-f "${lk_path}Makefile")
630 && (-f "${lk_path}README")
631 && (-d "${lk_path}Documentation")
632 && (-d "${lk_path}arch")
633 && (-d "${lk_path}include")
634 && (-d "${lk_path}drivers")
635 && (-d "${lk_path}fs")
636 && (-d "${lk_path}init")
637 && (-d "${lk_path}ipc")
638 && (-d "${lk_path}kernel")
639 && (-d "${lk_path}lib")
640 && (-d "${lk_path}scripts")) {
641 return 1;
642 }
643 return 0;
644}
645
0e70e83d
JP
646sub parse_email {
647 my ($formatted_email) = @_;
648
649 my $name = "";
650 my $address = "";
651
11ecf53c 652 if ($formatted_email =~ /^([^<]+)<(.+\@.*)>.*$/) {
0e70e83d
JP
653 $name = $1;
654 $address = $2;
11ecf53c 655 } elsif ($formatted_email =~ /^\s*<(.+\@\S*)>.*$/) {
0e70e83d 656 $address = $1;
b781655a 657 } elsif ($formatted_email =~ /^(.+\@\S*).*$/) {
0e70e83d
JP
658 $address = $1;
659 }
cb7301c7
JP
660
661 $name =~ s/^\s+|\s+$//g;
d789504a 662 $name =~ s/^\"|\"$//g;
0e70e83d 663 $address =~ s/^\s+|\s+$//g;
cb7301c7 664
a63ceb4c 665 if ($name =~ /[^\w \-]/i) { ##has "must quote" chars
0e70e83d
JP
666 $name =~ s/(?<!\\)"/\\"/g; ##escape quotes
667 $name = "\"$name\"";
668 }
669
670 return ($name, $address);
671}
672
673sub format_email {
a8af2430 674 my ($name, $address, $usename) = @_;
0e70e83d
JP
675
676 my $formatted_email;
677
678 $name =~ s/^\s+|\s+$//g;
679 $name =~ s/^\"|\"$//g;
680 $address =~ s/^\s+|\s+$//g;
cb7301c7 681
a63ceb4c 682 if ($name =~ /[^\w \-]/i) { ##has "must quote" chars
cb7301c7 683 $name =~ s/(?<!\\)"/\\"/g; ##escape quotes
0e70e83d
JP
684 $name = "\"$name\"";
685 }
686
a8af2430 687 if ($usename) {
0e70e83d
JP
688 if ("$name" eq "") {
689 $formatted_email = "$address";
690 } else {
a8af2430 691 $formatted_email = "$name <$address>";
0e70e83d 692 }
cb7301c7 693 } else {
0e70e83d 694 $formatted_email = $address;
cb7301c7 695 }
0e70e83d 696
cb7301c7
JP
697 return $formatted_email;
698}
699
272a8979
JP
700sub find_first_section {
701 my $index = 0;
702
703 while ($index < @typevalue) {
704 my $tv = $typevalue[$index];
705 if (($tv =~ m/^(\C):\s*(.*)/)) {
706 last;
707 }
708 $index++;
709 }
710
711 return $index;
712}
713
b781655a 714sub find_starting_index {
b781655a
JP
715 my ($index) = @_;
716
717 while ($index > 0) {
718 my $tv = $typevalue[$index];
719 if (!($tv =~ m/^(\C):\s*(.*)/)) {
720 last;
721 }
722 $index--;
723 }
724
725 return $index;
726}
727
728sub find_ending_index {
cb7301c7
JP
729 my ($index) = @_;
730
b781655a 731 while ($index < @typevalue) {
cb7301c7 732 my $tv = $typevalue[$index];
b781655a
JP
733 if (!($tv =~ m/^(\C):\s*(.*)/)) {
734 last;
735 }
736 $index++;
737 }
738
739 return $index;
740}
741
3c7385b8
JP
742sub get_maintainer_role {
743 my ($index) = @_;
744
745 my $i;
746 my $start = find_starting_index($index);
747 my $end = find_ending_index($index);
748
749 my $role;
750 my $subsystem = $typevalue[$start];
751 if (length($subsystem) > 20) {
752 $subsystem = substr($subsystem, 0, 17);
753 $subsystem =~ s/\s*$//;
754 $subsystem = $subsystem . "...";
755 }
756
757 for ($i = $start + 1; $i < $end; $i++) {
758 my $tv = $typevalue[$i];
759 if ($tv =~ m/^(\C):\s*(.*)/) {
760 my $ptype = $1;
761 my $pvalue = $2;
762 if ($ptype eq "S") {
763 $role = $pvalue;
764 }
765 }
766 }
767
768 $role = lc($role);
769 if ($role eq "supported") {
770 $role = "supporter";
771 } elsif ($role eq "maintained") {
772 $role = "maintainer";
773 } elsif ($role eq "odd fixes") {
774 $role = "odd fixer";
775 } elsif ($role eq "orphan") {
776 $role = "orphan minder";
777 } elsif ($role eq "obsolete") {
778 $role = "obsolete minder";
779 } elsif ($role eq "buried alive in reporters") {
780 $role = "chief penguin";
781 }
782
783 return $role . ":" . $subsystem;
784}
785
786sub get_list_role {
787 my ($index) = @_;
788
789 my $i;
790 my $start = find_starting_index($index);
791 my $end = find_ending_index($index);
792
793 my $subsystem = $typevalue[$start];
794 if (length($subsystem) > 20) {
795 $subsystem = substr($subsystem, 0, 17);
796 $subsystem =~ s/\s*$//;
797 $subsystem = $subsystem . "...";
798 }
799
800 if ($subsystem eq "THE REST") {
801 $subsystem = "";
802 }
803
804 return $subsystem;
805}
806
b781655a
JP
807sub add_categories {
808 my ($index) = @_;
809
810 my $i;
811 my $start = find_starting_index($index);
812 my $end = find_ending_index($index);
813
814 push(@subsystem, $typevalue[$start]);
815
816 for ($i = $start + 1; $i < $end; $i++) {
817 my $tv = $typevalue[$i];
290603c1 818 if ($tv =~ m/^(\C):\s*(.*)/) {
cb7301c7
JP
819 my $ptype = $1;
820 my $pvalue = $2;
821 if ($ptype eq "L") {
290603c1
JP
822 my $list_address = $pvalue;
823 my $list_additional = "";
3c7385b8
JP
824 my $list_role = get_list_role($i);
825
826 if ($list_role ne "") {
827 $list_role = ":" . $list_role;
828 }
290603c1
JP
829 if ($list_address =~ m/([^\s]+)\s+(.*)$/) {
830 $list_address = $1;
831 $list_additional = $2;
832 }
bdf7c685 833 if ($list_additional =~ m/subscribers-only/) {
cb7301c7 834 if ($email_subscriber_list) {
3c7385b8 835 push(@list_to, [$list_address, "subscriber list${list_role}"]);
cb7301c7
JP
836 }
837 } else {
838 if ($email_list) {
3c7385b8 839 push(@list_to, [$list_address, "open list${list_role}"]);
cb7301c7
JP
840 }
841 }
842 } elsif ($ptype eq "M") {
0e70e83d
JP
843 my ($name, $address) = parse_email($pvalue);
844 if ($name eq "") {
b781655a
JP
845 if ($i > 0) {
846 my $tv = $typevalue[$i - 1];
0e70e83d
JP
847 if ($tv =~ m/^(\C):\s*(.*)/) {
848 if ($1 eq "P") {
849 $name = $2;
a8af2430 850 $pvalue = format_email($name, $address, $email_usename);
5f2441e9
JP
851 }
852 }
853 }
854 }
0e70e83d 855 if ($email_maintainer) {
3c7385b8
JP
856 my $role = get_maintainer_role($i);
857 push_email_addresses($pvalue, $role);
cb7301c7
JP
858 }
859 } elsif ($ptype eq "T") {
860 push(@scm, $pvalue);
861 } elsif ($ptype eq "W") {
862 push(@web, $pvalue);
863 } elsif ($ptype eq "S") {
864 push(@status, $pvalue);
865 }
cb7301c7
JP
866 }
867 }
868}
869
11ecf53c
JP
870my %email_hash_name;
871my %email_hash_address;
0e70e83d 872
11ecf53c
JP
873sub email_inuse {
874 my ($name, $address) = @_;
875
876 return 1 if (($name eq "") && ($address eq ""));
877 return 1 if (($name ne "") && exists($email_hash_name{$name}));
878 return 1 if (($address ne "") && exists($email_hash_address{$address}));
0e70e83d 879
0e70e83d
JP
880 return 0;
881}
882
1b5e1cf6 883sub push_email_address {
3c7385b8 884 my ($line, $role) = @_;
1b5e1cf6 885
0e70e83d 886 my ($name, $address) = parse_email($line);
1b5e1cf6 887
b781655a
JP
888 if ($address eq "") {
889 return 0;
890 }
891
11ecf53c 892 if (!$email_remove_duplicates) {
a8af2430 893 push(@email_to, [format_email($name, $address, $email_usename), $role]);
11ecf53c 894 } elsif (!email_inuse($name, $address)) {
a8af2430 895 push(@email_to, [format_email($name, $address, $email_usename), $role]);
11ecf53c
JP
896 $email_hash_name{$name}++;
897 $email_hash_address{$address}++;
1b5e1cf6 898 }
b781655a
JP
899
900 return 1;
1b5e1cf6
JP
901}
902
903sub push_email_addresses {
3c7385b8 904 my ($address, $role) = @_;
1b5e1cf6
JP
905
906 my @address_list = ();
907
5f2441e9 908 if (rfc822_valid($address)) {
3c7385b8 909 push_email_address($address, $role);
5f2441e9 910 } elsif (@address_list = rfc822_validlist($address)) {
1b5e1cf6
JP
911 my $array_count = shift(@address_list);
912 while (my $entry = shift(@address_list)) {
3c7385b8 913 push_email_address($entry, $role);
1b5e1cf6 914 }
5f2441e9 915 } else {
3c7385b8 916 if (!push_email_address($address, $role)) {
b781655a
JP
917 warn("Invalid MAINTAINERS address: '" . $address . "'\n");
918 }
1b5e1cf6 919 }
1b5e1cf6
JP
920}
921
3c7385b8
JP
922sub add_role {
923 my ($line, $role) = @_;
924
925 my ($name, $address) = parse_email($line);
a8af2430 926 my $email = format_email($name, $address, $email_usename);
3c7385b8
JP
927
928 foreach my $entry (@email_to) {
929 if ($email_remove_duplicates) {
930 my ($entry_name, $entry_address) = parse_email($entry->[0]);
03372dbb
JP
931 if (($name eq $entry_name || $address eq $entry_address)
932 && ($role eq "" || !($entry->[1] =~ m/$role/))
933 ) {
3c7385b8
JP
934 if ($entry->[1] eq "") {
935 $entry->[1] = "$role";
936 } else {
937 $entry->[1] = "$entry->[1],$role";
938 }
939 }
940 } else {
03372dbb
JP
941 if ($email eq $entry->[0]
942 && ($role eq "" || !($entry->[1] =~ m/$role/))
943 ) {
3c7385b8
JP
944 if ($entry->[1] eq "") {
945 $entry->[1] = "$role";
946 } else {
947 $entry->[1] = "$entry->[1],$role";
948 }
949 }
950 }
951 }
952}
953
cb7301c7
JP
954sub which {
955 my ($bin) = @_;
956
f5f5078d 957 foreach my $path (split(/:/, $ENV{PATH})) {
cb7301c7
JP
958 if (-e "$path/$bin") {
959 return "$path/$bin";
960 }
961 }
962
963 return "";
964}
965
bcde44ed
JP
966sub which_conf {
967 my ($conf) = @_;
968
969 foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
970 if (-e "$path/$conf") {
971 return "$path/$conf";
972 }
973 }
974
975 return "";
976}
977
8cbb3a77 978sub mailmap {
a8af2430 979 my (@lines) = @_;
8cbb3a77
JP
980 my %hash;
981
982 foreach my $line (@lines) {
983 my ($name, $address) = parse_email($line);
984 if (!exists($hash{$name})) {
985 $hash{$name} = $address;
11ecf53c
JP
986 } elsif ($address ne $hash{$name}) {
987 $address = $hash{$name};
a8af2430 988 $line = format_email($name, $address, $email_usename);
8cbb3a77
JP
989 }
990 if (exists($mailmap{$name})) {
991 my $obj = $mailmap{$name};
992 foreach my $map_address (@$obj) {
993 if (($map_address eq $address) &&
994 ($map_address ne $hash{$name})) {
a8af2430 995 $line = format_email($name, $hash{$name}, $email_usename);
8cbb3a77
JP
996 }
997 }
998 }
999 }
1000
1001 return @lines;
1002}
1003
60db31ac
JP
1004sub git_execute_cmd {
1005 my ($cmd) = @_;
1006 my @lines = ();
cb7301c7 1007
60db31ac
JP
1008 my $output = `$cmd`;
1009 $output =~ s/^\s*//gm;
1010 @lines = split("\n", $output);
1011
1012 return @lines;
a8af2430
JP
1013}
1014
60db31ac 1015sub hg_execute_cmd {
a8af2430 1016 my ($cmd) = @_;
60db31ac
JP
1017 my @lines = ();
1018
1019 my $output = `$cmd`;
1020 @lines = split("\n", $output);
a8af2430 1021
60db31ac
JP
1022 return @lines;
1023}
1024
1025sub vcs_find_signers {
1026 my ($cmd) = @_;
a8af2430
JP
1027 my @lines = ();
1028 my $commits;
1029
60db31ac 1030 @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
cb7301c7 1031
60db31ac 1032 my $pattern = $VCS_cmds{"commit_pattern"};
cb7301c7 1033
60db31ac 1034 $commits = grep(/$pattern/, @lines); # of commits
afa81ee1 1035
e4d26b02 1036 @lines = grep(/^[ \t]*${signaturePattern}.*\@.*$/, @lines);
0e70e83d
JP
1037 if (!$email_git_penguin_chiefs) {
1038 @lines = grep(!/${penguin_chiefs}/i, @lines);
1039 }
63ab52db
JP
1040
1041 return (0, @lines) if !@lines;
1042
0e70e83d
JP
1043 # cut -f2- -d":"
1044 s/.*:\s*(.+)\s*/$1/ for (@lines);
1045
a8af2430
JP
1046## Reformat email addresses (with names) to avoid badly written signatures
1047
3c7385b8
JP
1048 foreach my $line (@lines) {
1049 my ($name, $address) = parse_email($line);
a8af2430
JP
1050 $line = format_email($name, $address, 1);
1051 }
1052
1053 return ($commits, @lines);
1054}
1055
63ab52db
JP
1056sub vcs_find_author {
1057 my ($cmd) = @_;
1058 my @lines = ();
1059
1060 @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1061
1062 if (!$email_git_penguin_chiefs) {
1063 @lines = grep(!/${penguin_chiefs}/i, @lines);
1064 }
1065
1066 return @lines if !@lines;
1067
1068## Reformat email addresses (with names) to avoid badly written signatures
1069
1070 foreach my $line (@lines) {
1071 my ($name, $address) = parse_email($line);
1072 $line = format_email($name, $address, 1);
1073 }
1074
1075 return @lines;
1076}
1077
60db31ac
JP
1078sub vcs_save_commits {
1079 my ($cmd) = @_;
1080 my @lines = ();
1081 my @commits = ();
1082
1083 @lines = &{$VCS_cmds{"execute_cmd"}}($cmd);
1084
1085 foreach my $line (@lines) {
1086 if ($line =~ m/$VCS_cmds{"blame_commit_pattern"}/) {
1087 push(@commits, $1);
1088 }
1089 }
1090
1091 return @commits;
1092}
1093
1094sub vcs_blame {
1095 my ($file) = @_;
1096 my $cmd;
1097 my @commits = ();
1098
1099 return @commits if (!(-f $file));
1100
1101 if (@range && $VCS_cmds{"blame_range_cmd"} eq "") {
1102 my @all_commits = ();
1103
1104 $cmd = $VCS_cmds{"blame_file_cmd"};
1105 $cmd =~ s/(\$\w+)/$1/eeg; #interpolate $cmd
1106 @all_commits = vcs_save_commits($cmd);
1107
1108 foreach my $file_range_diff (@range) {
1109 next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1110 my $diff_file = $1;
1111 my $diff_start = $2;
1112 my $diff_length = $3;
1113 next if ("$file" ne "$diff_file");
1114 for (my $i = $diff_start; $i < $diff_start + $diff_length; $i++) {
1115 push(@commits, $all_commits[$i]);
1116 }
1117 }
1118 } elsif (@range) {
1119 foreach my $file_range_diff (@range) {
1120 next if (!($file_range_diff =~ m/(.+):(.+):(.+)/));
1121 my $diff_file = $1;
1122 my $diff_start = $2;
1123 my $diff_length = $3;
1124 next if ("$file" ne "$diff_file");
1125 $cmd = $VCS_cmds{"blame_range_cmd"};
1126 $cmd =~ s/(\$\w+)/$1/eeg; #interpolate $cmd
1127 push(@commits, vcs_save_commits($cmd));
1128 }
1129 } else {
1130 $cmd = $VCS_cmds{"blame_file_cmd"};
1131 $cmd =~ s/(\$\w+)/$1/eeg; #interpolate $cmd
1132 @commits = vcs_save_commits($cmd);
1133 }
1134
63ab52db
JP
1135 foreach my $commit (@commits) {
1136 $commit =~ s/^\^//g;
1137 }
1138
60db31ac
JP
1139 return @commits;
1140}
1141
1142my $printed_novcs = 0;
1143sub vcs_exists {
1144 %VCS_cmds = %VCS_cmds_git;
1145 return 1 if eval $VCS_cmds{"available"};
1146 %VCS_cmds = %VCS_cmds_hg;
1147 return 1 if eval $VCS_cmds{"available"};
1148 %VCS_cmds = ();
1149 if (!$printed_novcs) {
1150 warn("$P: No supported VCS found. Add --nogit to options?\n");
1151 warn("Using a git repository produces better results.\n");
1152 warn("Try Linus Torvalds' latest git repository using:\n");
1153 warn("git clone git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git\n");
1154 $printed_novcs = 1;
1155 }
1156 return 0;
1157}
1158
1159sub vcs_assign {
a8af2430
JP
1160 my ($role, $divisor, @lines) = @_;
1161
1162 my %hash;
1163 my $count = 0;
1164
a8af2430
JP
1165 return if (@lines <= 0);
1166
1167 if ($divisor <= 0) {
60db31ac 1168 warn("Bad divisor in " . (caller(0))[3] . ": $divisor\n");
a8af2430 1169 $divisor = 1;
3c7385b8 1170 }
8cbb3a77 1171
11ecf53c
JP
1172 if ($email_remove_duplicates) {
1173 @lines = mailmap(@lines);
1174 }
0e70e83d 1175
63ab52db
JP
1176 return if (@lines <= 0);
1177
0e70e83d 1178 @lines = sort(@lines);
11ecf53c 1179
0e70e83d 1180 # uniq -c
11ecf53c
JP
1181 $hash{$_}++ for @lines;
1182
0e70e83d 1183 # sort -rn
0e70e83d 1184 foreach my $line (sort {$hash{$b} <=> $hash{$a}} keys %hash) {
11ecf53c 1185 my $sign_offs = $hash{$line};
a8af2430 1186 my $percent = $sign_offs * 100 / $divisor;
3c7385b8 1187
a8af2430 1188 $percent = 100 if ($percent > 100);
11ecf53c
JP
1189 $count++;
1190 last if ($sign_offs < $email_git_min_signatures ||
1191 $count > $email_git_max_maintainers ||
a8af2430 1192 $percent < $email_git_min_percent);
3c7385b8 1193 push_email_address($line, '');
3c7385b8 1194 if ($output_rolestats) {
a8af2430
JP
1195 my $fmt_percent = sprintf("%.0f", $percent);
1196 add_role($line, "$role:$sign_offs/$divisor=$fmt_percent%");
1197 } else {
1198 add_role($line, $role);
3c7385b8 1199 }
f5492666
JP
1200 }
1201}
1202
60db31ac 1203sub vcs_file_signoffs {
a8af2430
JP
1204 my ($file) = @_;
1205
1206 my @signers = ();
60db31ac 1207 my $commits;
f5492666 1208
60db31ac 1209 return if (!vcs_exists());
a8af2430 1210
60db31ac
JP
1211 my $cmd = $VCS_cmds{"find_signers_cmd"};
1212 $cmd =~ s/(\$\w+)/$1/eeg; # interpolate $cmd
f5492666 1213
60db31ac
JP
1214 ($commits, @signers) = vcs_find_signers($cmd);
1215 vcs_assign("commit_signer", $commits, @signers);
f5492666
JP
1216}
1217
60db31ac 1218sub vcs_file_blame {
f5492666
JP
1219 my ($file) = @_;
1220
a8af2430 1221 my @signers = ();
63ab52db 1222 my @all_commits = ();
60db31ac 1223 my @commits = ();
a8af2430 1224 my $total_commits;
63ab52db 1225 my $total_lines;
f5492666 1226
60db31ac 1227 return if (!vcs_exists());
f5492666 1228
63ab52db
JP
1229 @all_commits = vcs_blame($file);
1230 @commits = uniq(@all_commits);
a8af2430 1231 $total_commits = @commits;
63ab52db 1232 $total_lines = @all_commits;
8cbb3a77 1233
a8af2430
JP
1234 foreach my $commit (@commits) {
1235 my $commit_count;
1236 my @commit_signers = ();
8cbb3a77 1237
60db31ac
JP
1238 my $cmd = $VCS_cmds{"find_commit_signers_cmd"};
1239 $cmd =~ s/(\$\w+)/$1/eeg; #interpolate $cmd
1240
1241 ($commit_count, @commit_signers) = vcs_find_signers($cmd);
63ab52db 1242
60db31ac 1243 push(@signers, @commit_signers);
f5492666
JP
1244 }
1245
a8af2430 1246 if ($from_filename) {
63ab52db
JP
1247 if ($output_rolestats) {
1248 my @blame_signers;
1249 foreach my $commit (@commits) {
1250 my $i;
1251 my $cmd = $VCS_cmds{"find_commit_author_cmd"};
1252 $cmd =~ s/(\$\w+)/$1/eeg; #interpolate $cmd
1253 my @author = vcs_find_author($cmd);
1254 next if !@author;
1255 my $count = grep(/$commit/, @all_commits);
1256 for ($i = 0; $i < $count ; $i++) {
1257 push(@blame_signers, $author[0]);
1258 }
1259 }
1260 if (@blame_signers) {
1261 vcs_assign("authored lines", $total_lines, @blame_signers);
1262 }
1263 }
60db31ac 1264 vcs_assign("commits", $total_commits, @signers);
a8af2430 1265 } else {
60db31ac 1266 vcs_assign("modified commits", $total_commits, @signers);
cb7301c7 1267 }
cb7301c7
JP
1268}
1269
1270sub uniq {
a8af2430 1271 my (@parms) = @_;
cb7301c7
JP
1272
1273 my %saw;
1274 @parms = grep(!$saw{$_}++, @parms);
1275 return @parms;
1276}
1277
1278sub sort_and_uniq {
a8af2430 1279 my (@parms) = @_;
cb7301c7
JP
1280
1281 my %saw;
1282 @parms = sort @parms;
1283 @parms = grep(!$saw{$_}++, @parms);
1284 return @parms;
1285}
1286
03372dbb
JP
1287sub clean_file_emails {
1288 my (@file_emails) = @_;
1289 my @fmt_emails = ();
1290
1291 foreach my $email (@file_emails) {
1292 $email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g;
1293 my ($name, $address) = parse_email($email);
1294 if ($name eq '"[,\.]"') {
1295 $name = "";
1296 }
1297
1298 my @nw = split(/[^A-Za-zÀ-ÿ\'\,\.\+-]/, $name);
1299 if (@nw > 2) {
1300 my $first = $nw[@nw - 3];
1301 my $middle = $nw[@nw - 2];
1302 my $last = $nw[@nw - 1];
1303
1304 if (((length($first) == 1 && $first =~ m/[A-Za-z]/) ||
1305 (length($first) == 2 && substr($first, -1) eq ".")) ||
1306 (length($middle) == 1 ||
1307 (length($middle) == 2 && substr($middle, -1) eq "."))) {
1308 $name = "$first $middle $last";
1309 } else {
1310 $name = "$middle $last";
1311 }
1312 }
1313
1314 if (substr($name, -1) =~ /[,\.]/) {
1315 $name = substr($name, 0, length($name) - 1);
1316 } elsif (substr($name, -2) =~ /[,\.]"/) {
1317 $name = substr($name, 0, length($name) - 2) . '"';
1318 }
1319
1320 if (substr($name, 0, 1) =~ /[,\.]/) {
1321 $name = substr($name, 1, length($name) - 1);
1322 } elsif (substr($name, 0, 2) =~ /"[,\.]/) {
1323 $name = '"' . substr($name, 2, length($name) - 2);
1324 }
1325
1326 my $fmt_email = format_email($name, $address, $email_usename);
1327 push(@fmt_emails, $fmt_email);
1328 }
1329 return @fmt_emails;
1330}
1331
3c7385b8
JP
1332sub merge_email {
1333 my @lines;
1334 my %saw;
1335
1336 for (@_) {
1337 my ($address, $role) = @$_;
1338 if (!$saw{$address}) {
1339 if ($output_roles) {
60db31ac 1340 push(@lines, "$address ($role)");
3c7385b8 1341 } else {
60db31ac 1342 push(@lines, $address);
3c7385b8
JP
1343 }
1344 $saw{$address} = 1;
1345 }
1346 }
1347
1348 return @lines;
1349}
1350
cb7301c7 1351sub output {
a8af2430 1352 my (@parms) = @_;
cb7301c7
JP
1353
1354 if ($output_multiline) {
1355 foreach my $line (@parms) {
1356 print("${line}\n");
1357 }
1358 } else {
1359 print(join($output_separator, @parms));
1360 print("\n");
1361 }
1362}
1b5e1cf6
JP
1363
1364my $rfc822re;
1365
1366sub make_rfc822re {
1367# Basic lexical tokens are specials, domain_literal, quoted_string, atom, and
1368# comment. We must allow for rfc822_lwsp (or comments) after each of these.
1369# This regexp will only work on addresses which have had comments stripped
1370# and replaced with rfc822_lwsp.
1371
1372 my $specials = '()<>@,;:\\\\".\\[\\]';
1373 my $controls = '\\000-\\037\\177';
1374
1375 my $dtext = "[^\\[\\]\\r\\\\]";
1376 my $domain_literal = "\\[(?:$dtext|\\\\.)*\\]$rfc822_lwsp*";
1377
1378 my $quoted_string = "\"(?:[^\\\"\\r\\\\]|\\\\.|$rfc822_lwsp)*\"$rfc822_lwsp*";
1379
1380# Use zero-width assertion to spot the limit of an atom. A simple
1381# $rfc822_lwsp* causes the regexp engine to hang occasionally.
1382 my $atom = "[^$specials $controls]+(?:$rfc822_lwsp+|\\Z|(?=[\\[\"$specials]))";
1383 my $word = "(?:$atom|$quoted_string)";
1384 my $localpart = "$word(?:\\.$rfc822_lwsp*$word)*";
1385
1386 my $sub_domain = "(?:$atom|$domain_literal)";
1387 my $domain = "$sub_domain(?:\\.$rfc822_lwsp*$sub_domain)*";
1388
1389 my $addr_spec = "$localpart\@$rfc822_lwsp*$domain";
1390
1391 my $phrase = "$word*";
1392 my $route = "(?:\@$domain(?:,\@$rfc822_lwsp*$domain)*:$rfc822_lwsp*)";
1393 my $route_addr = "\\<$rfc822_lwsp*$route?$addr_spec\\>$rfc822_lwsp*";
1394 my $mailbox = "(?:$addr_spec|$phrase$route_addr)";
1395
1396 my $group = "$phrase:$rfc822_lwsp*(?:$mailbox(?:,\\s*$mailbox)*)?;\\s*";
1397 my $address = "(?:$mailbox|$group)";
1398
1399 return "$rfc822_lwsp*$address";
1400}
1401
1402sub rfc822_strip_comments {
1403 my $s = shift;
1404# Recursively remove comments, and replace with a single space. The simpler
1405# regexps in the Email Addressing FAQ are imperfect - they will miss escaped
1406# chars in atoms, for example.
1407
1408 while ($s =~ s/^((?:[^"\\]|\\.)*
1409 (?:"(?:[^"\\]|\\.)*"(?:[^"\\]|\\.)*)*)
1410 \((?:[^()\\]|\\.)*\)/$1 /osx) {}
1411 return $s;
1412}
1413
1414# valid: returns true if the parameter is an RFC822 valid address
1415#
22dd5b0c 1416sub rfc822_valid {
1b5e1cf6
JP
1417 my $s = rfc822_strip_comments(shift);
1418
1419 if (!$rfc822re) {
1420 $rfc822re = make_rfc822re();
1421 }
1422
1423 return $s =~ m/^$rfc822re$/so && $s =~ m/^$rfc822_char*$/;
1424}
1425
1426# validlist: In scalar context, returns true if the parameter is an RFC822
1427# valid list of addresses.
1428#
1429# In list context, returns an empty list on failure (an invalid
1430# address was found); otherwise a list whose first element is the
1431# number of addresses found and whose remaining elements are the
1432# addresses. This is needed to disambiguate failure (invalid)
1433# from success with no addresses found, because an empty string is
1434# a valid list.
1435
22dd5b0c 1436sub rfc822_validlist {
1b5e1cf6
JP
1437 my $s = rfc822_strip_comments(shift);
1438
1439 if (!$rfc822re) {
1440 $rfc822re = make_rfc822re();
1441 }
1442 # * null list items are valid according to the RFC
1443 # * the '1' business is to aid in distinguishing failure from no results
1444
1445 my @r;
1446 if ($s =~ m/^(?:$rfc822re)?(?:,(?:$rfc822re)?)*$/so &&
1447 $s =~ m/^$rfc822_char*$/) {
5f2441e9 1448 while ($s =~ m/(?:^|,$rfc822_lwsp*)($rfc822re)/gos) {
60db31ac 1449 push(@r, $1);
1b5e1cf6
JP
1450 }
1451 return wantarray ? (scalar(@r), @r) : 1;
1452 }
60db31ac 1453 return wantarray ? () : 0;
1b5e1cf6 1454}