pkgsrc/textproc/mdoclint/files/mdoclint
2013-01-01 01:42:08 +00:00

625 lines
15 KiB
Text
Executable file

#!@PERL5@
#
# $OpenBSD: mdoclint,v 1.14 2009/04/13 12:40:05 espie Exp $
# $NetBSD: mdoclint,v 1.21 2013/01/01 01:42:08 jnemeth Exp $
#
# Copyright (c) 2001-2012 Thomas Klausner
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR, THOMAS KLAUSNER,
# ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
#
use strict;
use warnings;
$| = 1;
package Parser;
use Getopt::Std;
use constant {
OPENBSD => 0,
NETBSD => 1
};
use vars qw(
$opt_A $opt_a $opt_D $opt_d $opt_e $opt_F $opt_f $opt_H $opt_h $opt_m
$opt_n $opt_O $opt_o $opt_P $opt_p $opt_r $opt_S $opt_s $opt_v $opt_w
$opt_X $opt_x
);
my $arch=`uname -m`;
chomp($arch);
my $options="AaDdeFfHhmnOoPprSsvwXx";
sub usage
{
my $default = OPENBSD ? "-aDdfmnoPprSsXx" : "-aDdefmnoPprSsXx";
print STDERR <<"EOF";
mdoclint: verify man page correctness
usage: mdoclint [-$options] file ...
-A warn about missing .An in AUTHORS section
-a warn about SEE ALSO section problems
-D warn about bad casing and archs in .Dt
-d warn about bad date strings (in .Dd only)
-e warn about unsorted errors (for functions)
-F fix whitespace problems (asks before overwriting)
-f warn about possible incorrect .Fn syntax
-H warn about characters that produce problems in HTML output
-h display this help text
-m warn about man pages that are not in mdoc(7) format
-n warn about .Nd's ending in '.'
-O warn about unsorted .It arguments
-o warn about non-empty .Os strings
-P warn about paragraph problems
-p warn about punctuation problems
-r warn about missing RCS Id
-S warn about any .Sh weirdness
-s warn about whitespace problems
-v verbose output
-w show section header in warnings
-X warn about explicit mentions of FreeBSD, NetBSD, or OpenBSD
-x warn about cross-references with missing targets
Default is $default if no flag is specified.
EOF
exit(0);
}
my %short = (
"Free" => ".Fx",
"Net" => ".Nx",
"Open" => ".Ox"
);
# constants to build
my %sections;
my $arches_re;
my $sections_re;
my $esections_re;
my $valid_date_re;
# and the code that builds them
{
my @sections = (
"NAME",
NETBSD ? "LIBRARY" : undef,
"SYNOPSIS",
"DESCRIPTION",
NETBSD ? "IMPLEMENTATION NOTES" : undef,
"RETURN VALUES",
"ENVIRONMENT",
"FILES",
"EXIT STATUS",
"EXAMPLES",
"DIAGNOSTICS",
NETBSD ? "COMPATIBILITY" : undef,
"ERRORS",
"SEE ALSO",
"STANDARDS",
"HISTORY",
"AUTHORS",
"CAVEATS",
"BUGS",
NETBSD ? "SECURITY CONSIDERATIONS" : undef
);
my $i = 1;
for my $sh (@sections) {
if (defined $sh) {
$sections{$sh} = $i++;
}
}
my @arches;
if (OPENBSD) {
@arches =
(qw(alpha amd64 arm armish aviion beagle cats hp300 hppa
hppa64 i386 landisk loongson luna88k macppc mips64
mvme68k mvme88k sgi socppc sparc sparc64 vax zaurus));
}
if (NETBSD) {
@arches =
(qw(acorn26 acorn32 algor alpha amiga arc atari
bebox cats cesfic cobalt dreamcast evbarm evbmips evbppc
evbsh3 evbsh5 hp300 hp700 hpcarm hpcmips hpcsh
i386 ibmnws luna68k mac68k macppc mipsco mmeye
mvme68k mvmeppc netwinder news68k newsmips next68k
pc532 playstation2 pmax pmppc prep sandpoint sbmips
sgimips shark sparc sparc64 sun2 sun3 vax walnut
x68k x86 x86_64 xen));
}
my $a = join('|', @arches);
$arches_re = qr{(?:$a)}o;
if (OPENBSD) {
$sections_re = qr{(?:3p|[1-9])}o;
$esections_re = qr{(?:3p|[0-9])}o;
}
if (NETBSD) {
$sections_re = qr{[1-9]}o;
$esections_re = qr{[0-9]}o;
}
if (OPENBSD) {
$valid_date_re = qr{\$Mdocdate\b};
}
if (NETBSD) {
$valid_date_re = qr{(?:January|February|March|April|May|June|July|August|September|October|November|December)\s*[1-9][0-9]*,\s*(?:198[0-9]|199[0-9]|200[0-9]|201[0-3])$}o;
}
}
sub debug
{
my $self = shift;
print STDOUT "debug: $self->{fn}:$self->{ln}: @_\n" if $opt_v;
}
sub warning
{
my $self = shift;
my $extra = "";
if ($opt_w) {
$extra = $self->{current_section_header}.":";
}
print STDOUT "$self->{fn}:$extra$self->{ln}: ", join('', @_), "\n";
}
my $order = " !\"#\$\%&'()*+,-./:;<=>?[\\]^_{|}~".
"0123456789".
"AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz";
sub forder {
my ($a, $b, $c, $len, $i);
$a = $_[0];
$b = $_[1];
$len = (length($a) < length($b))? length($a) : length($b);
for($i = 0; $i<=$len; $i++) {
$c = (index($order,substr($a,$i,1)) <=> index($order,substr($b,$i,1)));
if($c){ return $c };
}
return (length($a) <=> length($b));
}
sub handle_options
{
getopts($options);
$opt_h and usage();
# default to all warnings if no flag is set
unless ($opt_A or $opt_a or $opt_D or $opt_d or $opt_e
or $opt_f or $opt_H or $opt_m or $opt_n or $opt_O
or $opt_o or $opt_P or $opt_p or $opt_r
or $opt_S or $opt_s or $opt_X or $opt_x) {
$opt_A = $opt_a = $opt_D = $opt_d = $opt_f = $opt_m =
$opt_n = $opt_O = $opt_o = $opt_P = $opt_p = $opt_r = $opt_S =
$opt_s = $opt_X = $opt_x = 1;
$opt_e = 1 if NETBSD;
}
}
sub verify_xref
{
my ($self, $page, $section, $pre, $post) = @_;
if ("$page.$section" eq $self->{fn}) {
$self->warning("Xref to itself (use .Nm instead)") if $opt_x;
}
# try to find corresponding man page
for my $dir ("/usr/share/man",
OPENBSD ? "/usr/X11R6/man" : "/usr/X11R7/man") {
for my $a ("", $arch) {
for my $page ("cat$section/$a/$page.0",
"man$section/$a/$page.$section") {
return 1 if -f "$dir/$page";
}
}
}
return 1 if -f "./$page.$section";
$self->warning($pre."trailing Xref to $page($section)$post") if $opt_x;
return 0;
}
sub new
{
my ($class, $fn) = @_;
my $o = {
mandoc_p => 1,
all => [],
lastline => '',
changes => 0,
oxrcsidseen => 0,
nxrcsidseen => 0,
lastsh => 0,
sasection => 0,
saname => '',
sarest => ',',
insa => 0,
inliteral => 0,
shseen => {},
last_error_name => '',
current_section_header => '',
fn => $fn
};
open my $input, '<', $fn or die "can't open input file $fn";
$o->{file} = $input;
$o->{ln} = 0;
bless $o, $class;
}
sub next_line
{
my ($self) = @_;
my $l = readline($self->{file});
if (defined $l) {
$self->{ln}++;
}
return $l;
}
sub close
{
my ($self) = @_;
close($self->{file});
}
sub parse_macro_args
{
my ($s, $string) = @_;
my $_ = $string;
my @params = ();
while (!/^$/) {
if (s/^\"(.*?)\"\s*//) {
push(@params, $1);
} elsif (s/^(\S+)\s*//) {
push(@params, $1);
}
}
if (@params > 9 and OPENBSD) {
$s->warning("$string holds >9 parameters");
}
return @params;
}
sub set_section_header
{
my ($s, $section_header) = @_;
$section_header = join(' ', $s->parse_macro_args($section_header));
if ($section_header eq 'SEE ALSO') {
$s->{insa} = 1;
} elsif ($s->{insa} == 1) {
if (not $s->{sarest} eq "") {
$s->warning("unneeded characters at end of ",
"SEE ALSO: ", "`$s->{sarest}'") if $opt_a;
# to avoid a second warning at EOF
$s->{sarest} = "";
}
# finished SEE ALSO section
$s->{insa} = 2;
}
if (not $sections{$section_header}) {
$s->warning("unknown section header: ",
"`$section_header'") if $opt_S;
} else {
if ($s->{lastsh} >= $sections{$section_header}) {
$s->warning("section header ",
"`$section_header' in wrong order") if $opt_S;
}
$s->{shseen}->{$section_header} = 1;
$s->{lastsh} = $sections{$section_header};
}
if ($s->{lastline} =~ /^\.Pp/o) {
$s->warning("Paragraph problem: section header after .Pp")
if $opt_P;
}
$s->{current_section_header} = $section_header;
}
sub process_and_save_line
{
my ($s, $_) = @_;
my $result = $s->process_line($_);
# note that process_line chomps \n, then re-adds it,
# so we detect a change on last lines without a \n.
if ($result ne $_) {
$s->{changes} = 1;
}
push(@{$s->{all}}, $result);
}
sub process_line
{
my ($s, $_) = @_;
chomp;
# always cut trailing spaces
if (/\s+$/o) {
$s->warning("trailing space: `$_'") if $opt_s;
s/\s+$//o;
}
if (/\$OpenBSD\b.*\$/o) {
$s->{oxrcsidseen}++;
# nothing else to do
return "$_\n";
}
if (/[\$]NetBSD\b.*\$/o) {
$s->{nxrcsidseen}++;
# nothing else to do
return "$_\n";
}
# comments
if (/^\.\\\"/) {
return "$_\n";
}
if (/^\.TH\s+/o) {
$s->warning("not mandoc") if $opt_m;
$s->{mandoc_p} = 0;
# /^.TH\s*[\w-_".]+\s*([1-9])/;
# $section = $1;
return "$_\n";
}
# if (/^.Dt\s*[\w-_".]+\s*([1-9])/) {
# $section = $1;
# }
if (/^\.Dt\s+/o) {
if (! /^\.Dt\s+(?:[A-Z\d._-]+)\s+$sections_re(?:\s+$arches_re)?$/o) {
$s->warning("bad .Dt: `$_'") if $opt_D;
}
}
if ($s->{mandoc_p}) {
if (/^\.Sh\s+(.*)$/o) {
$s->set_section_header($1);
return "$_\n";
}
} else {
if (/^\.SH\s+(.*)$/o) {
$s->set_section_header($1);
return "$_\n";
}
}
if ($s->{insa} == 1) {
if (/^\.Xr\s+(\S+)\s+($sections_re)\s?(.*)?$/o) {
my ($saname, $sasection, $sarest) = ($1, $2, $3);
$saname =~ s/^\\&//o;
if ($s->{sasection} gt $sasection
or ($s->{sasection} eq $sasection and
(lc($s->{saname}) gt lc($saname)))) {
$s->warning("SEE ALSO: `.Xr $s->{saname} ",
"$s->{sasection}' should be after ",
"`.Xr $saname $sasection'") if $opt_a;
}
if ($s->{sarest} ne ",") {
$s->warning("SEE ALSO: .Xr not separated ",
"by comma, but `$s->{sarest}'") if $opt_a;
}
$s->{saname} = $saname;
$s->{sasection} = $sasection;
$s->{sarest} = $sarest;
}
if (/^\.Rs(?:\s+|$)/o) {
if ($s->{sarest} ne "") {
$s->warning("SEE ALSO: Not necessary to ",
"separate .Xr from .Rs by ",
"`$s->{sarest}'") if $opt_a;
}
$s->{sarest} = "";
}
}
if (/^\.Fn.*,.+/o) {
$s->warning("possible .Fn misuse: `$_'") if $opt_f;
}
if (OPENBSD) {
if (/^(?:[<>])/o or /[^\\][<>]/o) {
$s->warning("use \*(Lt \*(Gt (or .Aq) ",
"instead of < >: `$_'") if $opt_H;
}
}
if (NETBSD) {
if (/^(?:[<>&])/o or /[^\\][<>&]/o) {
$s->warning("use \*[Lt] \*[Gt] (or .Aq) \*[Am] ",
"instead of < > &: `$_'") if $opt_H;
}
}
if (/\b(Free|Net|Open)BSD\b/o
and not /\b(?:www|ftp)\.(?:Free|Net|Open)BSD\.org\b/o
and not /\bOpenBSD\::.*3p\b/o
and not /\/pub\/OpenBSD\//o
and not /\@(?:Free|Net|Open)BSD\.(?i:org)\b/o) {
$s->warning("verbose mention of `$1BSD' instead of "
. "`$short{$1}': `$_'") if $opt_X;
}
if (/^\./o and (/Bx (Open)/o or /Bx (Free)/o or /Bx (Net)/o)) {
$s->warning("`.Bx $1' found -- use $short{$1} instead")
if $opt_X;
}
if (/^\.Os\s+(.+)/o) {
$s->warning(".Os used with argument `$1'") if $opt_o;
}
if (/^\.Nd.*\.$/o) {
$s->warning(".Nd ends with a dot: `$_'") if $opt_n;
}
if (/\w\w\.\s+[A-Z]/o) {
$s->warning("new sentence, new line: `$_'") if $opt_p;
}
if (/^\... .*[^\s][\.();,\[\]\{\}:]$/o
and not /\s\.\.\.$/o and not /\\&.$/o) {
$s->warning("punctuation in format string ",
"without space: `$_'") if $opt_p;
}
if (/^\./o and /Ns [\.();,\[\]\{\}:]/o) {
$s->warning("possible Ns abuse: `$_'") if $opt_p;
}
if (/(\w+)\(\)/o) {
$s->warning("use .Fn or .Xr for functions: `$1()'") if $opt_p;
}
my $destruct = $_;
if ($s->{mandoc_p}) {
$destruct =~ s/\\\&([\w\.])/$1/o;
if ($destruct =~ /^\.Xr\s+([\w\:\.\-\+\/]+)\s+($esections_re)(.*)/o) {
$s->debug("Xref to $1($2) found: `$_'");
$s->verify_xref($1, $2, "", "");
if ($3 =~ /^\S/o) {
$s->warning("No space after section number in Xref: `$_'") if $opt_x;
}
} elsif ($destruct =~ /^\.Xr/o) {
$s->warning("Weird Xref found: `$_'") if $opt_x;
}
} else {
$destruct =~ s/\\f.//go;
if ($destruct !~ /^\.\\\"/o) {
while ($destruct =~ s/([-\w.]+)\s*\(($esections_re)\)//o) {
$s->debug("possible Xref to $1($2) found: `$_'");
$s->verify_xref($1, $2, "possible ", ": `$_'");
# so that we have a chance to find more than one
# per line
$destruct =~ s/(\w+)\s*\(($sections_re)\)//o;
}
}
}
if (/^\.Dd/o and not /^\.Dd\s+$valid_date_re/o) {
$s->warning("Invalid date found: `$_'") if $opt_d;
}
if (/^\.Bd\b.*-literal\b/o) {
$s->{inliteral} = 1;
}
if ($s->{inliteral} == 1) {
if (/^\.Ed\b/o) {
$s->{inliteral} = 0;
}
} elsif (/^$/o) {
$s->warning("Paragraph problem: empty line -- ",
"use .Pp for paragraphs") if $opt_P;
}
if ($s->{lastline} =~ /^\.Pp/o and /^(\.Ss|\.Pp)/o) {
$s->warning("Paragraph problem: $1 after .Pp") if $opt_P;
}
if (/^\.Pp/o and $s->{lastline} =~ /^(\.S[Ssh])/o) {
$s->warning("Paragraph problem: .Pp after $1") if $opt_P;
}
# Check whether the list of possible errors for a function is
# sorted alphabetically.
#
# Error names should not be sorted across different lists.
# (see bind(2) for an example.)
#
/^\.Bl\s+/o and $s->{last_error_name} = "";
if ($s->{current_section_header} eq "ERRORS" and
/^\.It\s+Bq\s+Er\s+(E[\w_]+)$/o) {
my $current_error_name = $1;
if ($s->{last_error_name} eq $current_error_name) {
$s->warning("Duplicate item for ",
$current_error_name, ".") if $opt_e;
} elsif ($current_error_name lt $s->{last_error_name}) {
$s->warning("$s->{last_error_name} and ",
"$current_error_name are not in ",
"alphabetical order.") if $opt_e;
}
$s->{last_error_name} = $current_error_name;
}
$s->{lastline} = $_;
return "$_\n";
}
sub finish
{
my ($s) = @_;
if (NETBSD and not $s->{nxrcsidseen}) {
$s->warning("Missing RCS Id") if $opt_r;
}
if (NETBSD and ($s->{nxrcsidseen} > 1)) {
$s->warning("RCS Id seen twice") if $opt_r;
}
if (OPENBSD and not $s->{oxrcsidseen}) {
$s->warning("Missing RCS Id") if $opt_r;
}
if (OPENBSD and ($s->{oxrcsidseen} > 1)) {
$s->warning("RCS Id seen twice") if $opt_r;
}
if ($s->{lastline} =~ /^\.Pp/o) {
$s->warning("Paragraph problem: .Pp at EOF") if $opt_P;
}
if ($s->{insa} > 0 and not $s->{sarest} eq "") {
$s->warning("unneeded characters at end of SEE ALSO: ",
"`$s->{sarest}'") if $opt_a;
}
# if (not ($fn =~ /$section$/)) {
# $s->warning("section doesn't match (internal value: $section)");
# }
if ($s->{mandoc_p}) {
foreach my $i (qw(NAME SYNOPSIS DESCRIPTION)) {
if (not ($s->{shseen}{$i})) {
$s->warning("missing $i section") if $opt_S;
}
}
}
}
package main;
sub handle_file
{
my $parser = Parser->new($_[0]);
while (my $_ = $parser->next_line) {
$parser->process_and_save_line($_);
}
$parser->finish;
$parser->close;
if ($Parser::opt_F and $parser->{changes}) {
open OUT, ">$_[0].new" or
die "can't open output file `$_[0].new'";
for my $l (@{$parser->{all}}) {
print OUT $l
}
close OUT;
system("mv -i $_[0].new $_[0]");
}
}
Parser->handle_options;
foreach my $file (@ARGV) {
handle_file($file);
}