jobcore/bash/bash-5.2_p32-read-delimiter-in-invalid-mbchar.patch
2024-08-27 12:22:15 +03:00

297 lines
7.5 KiB
Diff

From 0432ec33408ac124b620c44416c9c58f0c10b63b Mon Sep 17 00:00:00 2001
From: Kerin Millar <kfm@plushkava.net>
Date: Fri, 23 Aug 2024 04:14:36 +0100
Subject: [PATCH] Backport fix for issue with read delimiter in invalid
mutibyte char
This addresses a regression introduced by 5.0. Consider the following
test case.
for i in {194..245}; do printf -v o %o "$i"; printf "\\$o\\n"; done |
while read -r; do declare -p REPLY; done
BEFORE
declare -- REPLY=$'\302\n\303\n\304\n\305\n\306\n\307\n\310\n\311\n\312\
n\313\n\314\n\315\n\316\n\317\n\320\n\321\n\322\n\323\n\324\n\325\n\326\
n\327\n\330\n\331\n\332\n\333\n\334\n\335\n\336\n\337\n\340\n\341\n\342\
n\343\n\344\n\345\n\346\n\347\n\350\n\351\n\352\n\353\n\354\n\355\n\356\
n\357\n\360\n\361\n\362\n\363\n\364\n\365'
AFTER
declare -- REPLY=$'\302'
declare -- REPLY=$'\303'
declare -- REPLY=$'\304'
declare -- REPLY=$'\305'
declare -- REPLY=$'\306'
declare -- REPLY=$'\307'
declare -- REPLY=$'\310'
declare -- REPLY=$'\311'
declare -- REPLY=$'\312'
declare -- REPLY=$'\313'
declare -- REPLY=$'\314'
declare -- REPLY=$'\315'
declare -- REPLY=$'\316'
declare -- REPLY=$'\317'
declare -- REPLY=$'\320'
declare -- REPLY=$'\321'
declare -- REPLY=$'\322'
declare -- REPLY=$'\323'
declare -- REPLY=$'\324'
declare -- REPLY=$'\325'
declare -- REPLY=$'\326'
declare -- REPLY=$'\327'
declare -- REPLY=$'\330'
declare -- REPLY=$'\331'
declare -- REPLY=$'\332'
declare -- REPLY=$'\333'
declare -- REPLY=$'\334'
declare -- REPLY=$'\335'
declare -- REPLY=$'\336'
declare -- REPLY=$'\337'
declare -- REPLY=$'\340'
declare -- REPLY=$'\341'
declare -- REPLY=$'\342'
declare -- REPLY=$'\343'
declare -- REPLY=$'\344'
declare -- REPLY=$'\345'
declare -- REPLY=$'\346'
declare -- REPLY=$'\347'
declare -- REPLY=$'\350'
declare -- REPLY=$'\351'
declare -- REPLY=$'\352'
declare -- REPLY=$'\353'
declare -- REPLY=$'\354'
declare -- REPLY=$'\355'
declare -- REPLY=$'\356'
declare -- REPLY=$'\357'
declare -- REPLY=$'\360'
declare -- REPLY=$'\361'
declare -- REPLY=$'\362'
declare -- REPLY=$'\363'
declare -- REPLY=$'\364'
declare -- REPLY=$'\365'
Signed-off-by: Kerin Millar <kfm@plushkava.net>
---
builtins/read.def | 25 ++++++++++++----
externs.h | 1 +
lib/sh/zread.c | 74 +++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 94 insertions(+), 6 deletions(-)
diff --git builtins/read.def builtins/read.def
index ddd91d32..53b4bd81 100644
--- builtins/read.def
+++ builtins/read.def
@@ -130,7 +130,7 @@ static void set_readline_timeout PARAMS((sh_timer *t, time_t, long));
#endif
static SHELL_VAR *bind_read_variable PARAMS((char *, char *, int));
#if defined (HANDLE_MULTIBYTE)
-static int read_mbchar PARAMS((int, char *, int, int, int));
+static int read_mbchar PARAMS((int, char *, int, int, int, int));
#endif
static void ttyrestore PARAMS((struct ttsave *));
@@ -806,7 +806,7 @@ add_char:
else
# endif
if (locale_utf8locale == 0 || ((c & 0x80) != 0))
- i += read_mbchar (fd, input_string, i, c, unbuffered_read);
+ i += read_mbchar (fd, input_string, i, c, delim, unbuffered_read);
}
#endif
@@ -1064,10 +1064,10 @@ bind_read_variable (name, value, flags)
#if defined (HANDLE_MULTIBYTE)
static int
-read_mbchar (fd, string, ind, ch, unbuffered)
+read_mbchar (fd, string, ind, ch, delim, unbuffered)
int fd;
char *string;
- int ind, ch, unbuffered;
+ int ind, ch, delim, unbuffered;
{
char mbchar[MB_LEN_MAX + 1];
int i, n, r;
@@ -1101,8 +1101,21 @@ read_mbchar (fd, string, ind, ch, unbuffered)
mbchar[i++] = c;
continue;
}
- else if (ret == (size_t)-1 || ret == (size_t)0 || ret > (size_t)0)
- break;
+ else if (ret == (size_t)-1)
+ {
+ /* If we read a delimiter character that makes this an invalid
+ multibyte character, we can't just add it to the input string
+ and treat it as a byte. We need to push it back so a subsequent
+ zread will pick it up. */
+ if (c == delim)
+ {
+ zungetc (c);
+ mbchar[--i] = '\0'; /* unget the delimiter */
+ }
+ break; /* invalid multibyte character */
+ }
+ else if (ret == (size_t)0 || ret > (size_t)0)
+ break; /* valid multibyte character */
}
mbchar_return:
diff --git externs.h externs.h
index 931dba9c..1b70a13b 100644
--- externs.h
+++ externs.h
@@ -536,6 +536,7 @@ extern ssize_t zreadintr PARAMS((int, char *, size_t));
extern ssize_t zreadc PARAMS((int, char *));
extern ssize_t zreadcintr PARAMS((int, char *));
extern ssize_t zreadn PARAMS((int, char *, size_t));
+extern int zungetc PARAMS((int));
extern void zreset PARAMS((void));
extern void zsyncfd PARAMS((int));
diff --git lib/sh/zread.c lib/sh/zread.c
index dafb7f60..7cfbb288 100644
--- lib/sh/zread.c
+++ lib/sh/zread.c
@@ -41,6 +41,10 @@ extern int errno;
# define ZBUFSIZ 4096
#endif
+#ifndef EOF
+# define EOF -1
+#endif
+
extern int executing_builtin;
extern void check_signals_and_traps (void);
@@ -48,6 +52,11 @@ extern void check_signals (void);
extern int signal_is_trapped (int);
extern int read_builtin_timeout (int);
+int zungetc (int);
+
+/* Provide one character of pushback whether we are using read or zread. */
+static int zpushedchar = -1;
+
/* Read LEN bytes from FD into BUF. Retry the read on EINTR. Any other
error causes the loop to break. */
ssize_t
@@ -59,6 +68,15 @@ zread (fd, buf, len)
ssize_t r;
check_signals (); /* check for signals before a blocking read */
+
+ /* If we pushed a char back, return it immediately */
+ if (zpushedchar != -1)
+ {
+ *buf = (unsigned char)zpushedchar;
+ zpushedchar = -1;
+ return 1;
+ }
+
/* should generalize into a mechanism where different parts of the shell can
`register' timeouts and have them checked here. */
while (((r = read_builtin_timeout (fd)) < 0 || (r = read (fd, buf, len)) < 0) &&
@@ -95,6 +113,14 @@ zreadretry (fd, buf, len)
ssize_t r;
int nintr;
+ /* If we pushed a char back, return it immediately */
+ if (zpushedchar != -1)
+ {
+ *buf = (unsigned char)zpushedchar;
+ zpushedchar = -1;
+ return 1;
+ }
+
for (nintr = 0; ; )
{
r = read (fd, buf, len);
@@ -118,6 +144,15 @@ zreadintr (fd, buf, len)
size_t len;
{
check_signals ();
+
+ /* If we pushed a char back, return it immediately */
+ if (zpushedchar != -1)
+ {
+ *buf = (unsigned char)zpushedchar;
+ zpushedchar = -1;
+ return 1;
+ }
+
return (read (fd, buf, len));
}
@@ -135,6 +170,14 @@ zreadc (fd, cp)
{
ssize_t nr;
+ /* If we pushed a char back, return it immediately */
+ if (zpushedchar != -1 && cp)
+ {
+ *cp = (unsigned char)zpushedchar;
+ zpushedchar = -1;
+ return 1;
+ }
+
if (lind == lused || lused == 0)
{
nr = zread (fd, lbuf, sizeof (lbuf));
@@ -160,6 +203,14 @@ zreadcintr (fd, cp)
{
ssize_t nr;
+ /* If we pushed a char back, return it immediately */
+ if (zpushedchar != -1 && cp)
+ {
+ *cp = (unsigned char)zpushedchar;
+ zpushedchar = -1;
+ return 1;
+ }
+
if (lind == lused || lused == 0)
{
nr = zreadintr (fd, lbuf, sizeof (lbuf));
@@ -186,6 +237,13 @@ zreadn (fd, cp, len)
{
ssize_t nr;
+ if (zpushedchar != -1 && cp)
+ {
+ *cp = zpushedchar;
+ zpushedchar = -1;
+ return 1;
+ }
+
if (lind == lused || lused == 0)
{
if (len > sizeof (lbuf))
@@ -204,6 +262,22 @@ zreadn (fd, cp, len)
return 1;
}
+int
+zungetc (c)
+ int c;
+{
+ if (zpushedchar == -1)
+ {
+ zpushedchar = c;
+ return c;
+ }
+
+ if (c == EOF || lind == 0)
+ return (EOF);
+ lbuf[--lind] = c; /* XXX */
+ return c;
+}
+
void
zreset ()
{
--
2.45.2