aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2014-09-29 22:52:41 +0800
committerBen Hutchings <ben@decadent.org.uk>2020-03-28 21:42:54 +0000
commit0b425be3b607419cc27bdab4de3f9178b637c7c7 (patch)
treeb5ec814c4669ec56b89935b37da36c519f12dd23
parentd23eb0c786db8c7816a30978609ce6351e3b8583 (diff)
downloadklibc-0b425be3b607419cc27bdab4de3f9178b637c7c7.tar.gz
[klibc] dash: [PARSER] Handle backslash newlines properly after dollar sign
[ dash commit ef91d3d6a4c39421fd3a391e02cd82f9f3aee4a8 ] On Tue, Aug 26, 2014 at 12:34:42PM +0000, Eric Blake wrote: > On 08/26/2014 06:15 AM, Oleg Bulatov wrote: > > Hi! > > > > While playing with sh generators I found that dash and bash have different > > interpretations for <slash><newline> sequence. > > > > $ dash -c 'EDIT=xxx; echo $EDIT\ > >> OR' > > xxxOR > > Buggy. > > > $ bash -c 'EDIT=xxx; echo $EDIT\ > > OR' > > /usr/bin/vim > > Correct behavior. > > > > > $ dash -c 'echo "$\ > > (pwd)"' > > $(pwd) > > > > Is it undefined behaviour in POSIX? > > No, it's well-defined, and dash is buggy. POSIX says: > > http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_03 > > "the shell shall break its input into tokens by applying the first > applicable rule below to the next character in its input" > > Rule 4 covers backslash handling, while rule 5 covers locating the end > of a word to be subject to $ expansion. Therefore, rule 4 should happen > first. Rule 4 defers to the section on quoting, with the caveat that > <newline> joining is the only substitution that happens immediately as > part of the parsing: > > http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02 > > "If a <newline> follows the <backslash>, the shell shall interpret this > as line continuation. The <backslash> and <newline> shall be removed > before splitting the input into tokens. Since the escaped <newline> is > removed entirely from the input and is not replaced by any white space, > it cannot serve as a token separator." > > So the fact that dash is treating the elided backslash-newline as a > token separator, and parsing your input as if ${EDIT}OR instead of > ${EDITOR} is a bug in dash. I agree. This patch should resolve this problem and similar ones affecting blackslash newlines after we encounter a dollar sign. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
-rw-r--r--usr/dash/parser.c36
1 files changed, 27 insertions, 9 deletions
diff --git a/usr/dash/parser.c b/usr/dash/parser.c
index c4eaae2b93d55..2b07437e99751 100644
--- a/usr/dash/parser.c
+++ b/usr/dash/parser.c
@@ -827,6 +827,24 @@ breakloop:
#undef RETURN
}
+static int pgetc_eatbnl(void)
+{
+ int c;
+
+ while ((c = pgetc()) == '\\') {
+ if (pgetc() != '\n') {
+ pungetc();
+ break;
+ }
+
+ plinno++;
+ if (doprompt)
+ setprompt(2);
+ }
+
+ return c;
+}
+
/*
@@ -1179,7 +1197,7 @@ parsesub: {
char *p;
static const char types[] = "}-+?=";
- c = pgetc();
+ c = pgetc_eatbnl();
if (
(checkkwd & CHKEOFMARK) ||
c <= PEOA ||
@@ -1188,7 +1206,7 @@ parsesub: {
USTPUTC('$', out);
pungetc();
} else if (c == '(') { /* $(command) or $((arith)) */
- if (pgetc() == '(') {
+ if (pgetc_eatbnl() == '(') {
PARSEARITH();
} else {
pungetc();
@@ -1200,25 +1218,25 @@ parsesub: {
STADJUST(1, out);
subtype = VSNORMAL;
if (likely(c == '{')) {
- c = pgetc();
+ c = pgetc_eatbnl();
subtype = 0;
}
varname:
if (is_name(c)) {
do {
STPUTC(c, out);
- c = pgetc();
+ c = pgetc_eatbnl();
} while (is_in_name(c));
} else if (is_digit(c)) {
do {
STPUTC(c, out);
- c = pgetc();
+ c = pgetc_eatbnl();
} while (is_digit(c));
}
else if (is_special(c)) {
int cc = c;
- c = pgetc();
+ c = pgetc_eatbnl();
if (!subtype && cc == '#') {
subtype = VSLENGTH;
@@ -1227,7 +1245,7 @@ varname:
goto varname;
cc = c;
- c = pgetc();
+ c = pgetc_eatbnl();
if (cc == '}' || c != '}') {
pungetc();
subtype = 0;
@@ -1245,7 +1263,7 @@ varname:
switch (c) {
case ':':
subtype = VSNUL;
- c = pgetc();
+ c = pgetc_eatbnl();
/*FALLTHROUGH*/
default:
p = strchr(types, c);
@@ -1259,7 +1277,7 @@ varname:
int cc = c;
subtype = c == '#' ? VSTRIMLEFT :
VSTRIMRIGHT;
- c = pgetc();
+ c = pgetc_eatbnl();
if (c == cc)
subtype++;
else