mk/subst.mk: fix edge case in detection of identity substitutions
In a basic regular expression, a dollar-sign only means end-of-string if it appears at the end of the pattern, or (at the choice of the implementation) at the end of a \(...\) subexpression. This affects the package converters/help2man that uses a regular expression containing a dollar in a non-final position. This regular expression had not been detected as an identity substitution even though it is one.
This commit is contained in:
parent
b9f83bca93
commit
21aab909de
2 changed files with 31 additions and 7 deletions
|
@ -1,5 +1,5 @@
|
|||
#! /usr/bin/awk -f
|
||||
# $NetBSD: subst-identity.awk,v 1.2 2020/05/06 06:14:56 rillig Exp $
|
||||
# $NetBSD: subst-identity.awk,v 1.3 2020/05/11 19:52:14 rillig Exp $
|
||||
#
|
||||
# Tests whether a sed(1) command line consists of only identity substitutions
|
||||
# like s,id,id,.
|
||||
|
@ -9,13 +9,17 @@
|
|||
|
||||
# Returns the first character of the given regular expression,
|
||||
# if it is a single-character regular expression.
|
||||
function identity_char(s) {
|
||||
function identity_char(s, sep, i) {
|
||||
if (s ~ /^[\t -~]/ && s !~ /^[$&*.\[\\\]^]/)
|
||||
return substr(s, 1, 1);
|
||||
if (s ~ /^\\[$*.\[\]^]/)
|
||||
return substr(s, 2, 1) "x";
|
||||
if (s ~ /^\[[$*.]\]/)
|
||||
return substr(s, 2, 1) "xx";
|
||||
if (substr(s, 1, 1) == "$" && substr(s, 2, 1) != sep)
|
||||
return substr(s, 1, 1);
|
||||
if (substr(s, 1, 1) == "^" && i > 3)
|
||||
return substr(s, 1, 1);
|
||||
return "";
|
||||
}
|
||||
|
||||
|
@ -29,7 +33,7 @@ function is_identity_subst(s, len, i, sep, pat_from, pat_to, ch, subst) {
|
|||
i = 3;
|
||||
pat_to = "";
|
||||
while (i < len && substr(s, i, 1) != sep) {
|
||||
ch = identity_char(substr(s, i));
|
||||
ch = identity_char(substr(s, i), sep, i);
|
||||
if (ch == "")
|
||||
break;
|
||||
pat_to = pat_to substr(ch, 1, 1);
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#! /bin/sh
|
||||
# $NetBSD: subst.sh,v 1.35 2020/05/11 19:17:22 rillig Exp $
|
||||
# $NetBSD: subst.sh,v 1.36 2020/05/11 19:52:13 rillig Exp $
|
||||
#
|
||||
# Tests for mk/subst.mk.
|
||||
#
|
||||
|
@ -1219,9 +1219,29 @@ if test_case_begin "identity substitution implementation"; then
|
|||
# See converters/help2man for an example.
|
||||
assert_identity 'yes' -e 's,\$(var),$(var),'
|
||||
|
||||
# An unescaped dollar means end-of-line and cannot be part of an
|
||||
# identity substitution. This may happen, but is clearly a typo.
|
||||
assert_identity 'no' -e 's,$(var),$(var),'
|
||||
# POSIX 2004 and 2018 both define in section "9.3.8 BRE Expression
|
||||
# Anchoring" that a dollar-sign at the end of the string means
|
||||
# end-of-string.
|
||||
#
|
||||
# A dollar-sign followed by \) may or may not be an anchor.
|
||||
# In all other cases the dollar is an ordinary character.
|
||||
assert_identity 'yes' -e 's,$(var),$(var),'
|
||||
|
||||
# Since this dollar-sign may or may not be an anchor, treat the
|
||||
# whole regular expression as not-an-identity.
|
||||
#
|
||||
# Since a regular expression with a subexpression must contain
|
||||
# \( and \), it does not count as an identity substitution anyway,
|
||||
# which makes the implementation simple.
|
||||
assert_identity 'no' -e 's,aaa\(aaa$\),aaa\(aaa$\),'
|
||||
|
||||
assert_identity 'yes' -e 's,$a,$a,'
|
||||
assert_identity 'no' -e 's,a$,a$,'
|
||||
|
||||
# Same for the circumflex.
|
||||
assert_identity 'yes' -e 's,a^,a^,'
|
||||
assert_identity 'no' -e 's,^a,^a,'
|
||||
assert_identity 'no' -e 's,\(^aaa\)aaa,\(^aaa\)aaa,'
|
||||
|
||||
test_case_end
|
||||
fi
|
||||
|
|
Loading…
Reference in a new issue