mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-21 10:13:03 +00:00
scan: fix typo in UTF-8 escape
We had:
```
-mbchar ...|\xF0[\x\90-\xBF]([\x80-\xBF]{2})|...
+mbchar ...|\xF0[\x90-\xBF]([\x80-\xBF]{2})|...
```
so a precise sequence that matches the incorrect regex can let NUL
bytes pass through, which triggers an assertion violation downstream.
It is a pity that Flex does not report an error for such input.
Reported by Ahcheong Lee <ahcheong.lee@gmail.com>.
<https://lists.gnu.org/r/bug-bison/2021-04/msg00003.html>
* src/scan-gram.l (mbchar): Fix the bad regex.
* tests/input.at (Invalid inputs): Check that case.
This commit is contained in:
@@ -160,7 +160,7 @@ xint 0[xX][0-9abcdefABCDEF]+
|
|||||||
eol \n|\r\n
|
eol \n|\r\n
|
||||||
|
|
||||||
/* UTF-8 Encoded Unicode Code Point, from Flex's documentation. */
|
/* UTF-8 Encoded Unicode Code Point, from Flex's documentation. */
|
||||||
mbchar [\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF]([\x80-\xBF]{2})|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x\90-\xBF]([\x80-\xBF]{2})|[\xF1-\xF3]([\x80-\xBF]{3})|\xF4[\x80-\x8F]([\x80-\xBF]{2})
|
mbchar [\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF]([\x80-\xBF]{2})|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF]([\x80-\xBF]{2})|[\xF1-\xF3]([\x80-\xBF]{3})|\xF4[\x80-\x8F]([\x80-\xBF]{2})
|
||||||
|
|
||||||
/* Zero or more instances of backslash-newline. Following GCC, allow
|
/* Zero or more instances of backslash-newline. Following GCC, allow
|
||||||
white space between the backslash and the newline. */
|
white space between the backslash and the newline. */
|
||||||
|
|||||||
@@ -83,7 +83,8 @@ AT_CLEANUP
|
|||||||
AT_SETUP([Invalid inputs])
|
AT_SETUP([Invalid inputs])
|
||||||
|
|
||||||
AT_DATA([input.y],
|
AT_DATA([input.y],
|
||||||
[[\000\001\002\377?
|
[[%header "\360\000\200\210"
|
||||||
|
\000\001\002\377?
|
||||||
"\000"
|
"\000"
|
||||||
%%
|
%%
|
||||||
?
|
?
|
||||||
@@ -98,37 +99,41 @@ AT_PERL_REQUIRE([[-pi -e 's/\\(\d{3})/chr(oct($1))/ge' input.y]])
|
|||||||
AT_BISON_CHECK([-fcaret input.y], [1], [], [stderr])
|
AT_BISON_CHECK([-fcaret input.y], [1], [], [stderr])
|
||||||
|
|
||||||
# Autotest's diffing, when there are NUL bytes, just reports "binary
|
# Autotest's diffing, when there are NUL bytes, just reports "binary
|
||||||
# files differ". So don't leave NUL bytes.
|
# files differ". So don't leave NUL bytes. And don't leave invalid
|
||||||
AT_PERL_CHECK([[-p -e 's{([\0\377])}{sprintf "\\x%02x", ord($1)}ge' stderr]], [],
|
# mbchars either: escape raw binary.
|
||||||
[[input.y:1.1-2: error: invalid characters: '\0\001\002\377?'
|
AT_PERL_CHECK([[-p -e 's{([\0\200\210\360\377])}{sprintf "\\x%02x", ord($1)}ge' stderr]], [],
|
||||||
1 | \x00\xff?
|
[[input.y:1.11: error: invalid null character
|
||||||
|
1 | %header "\xf0\x00\x80\x88"
|
||||||
|
| ^
|
||||||
|
input.y:2.1-2: error: invalid characters: '\0\001\002\377?'
|
||||||
|
2 | \x00\xff?
|
||||||
| ^~
|
| ^~
|
||||||
input.y:2.2: error: invalid null character
|
input.y:3.2: error: invalid null character
|
||||||
2 | "\x00"
|
3 | "\x00"
|
||||||
| ^
|
| ^
|
||||||
input.y:4.1: error: invalid character: '?'
|
input.y:5.1: error: invalid character: '?'
|
||||||
4 | ?
|
5 | ?
|
||||||
| ^
|
| ^
|
||||||
input.y:5.14: error: invalid character: '}'
|
input.y:6.14: error: invalid character: '}'
|
||||||
5 | default: 'a' }
|
6 | default: 'a' }
|
||||||
| ^
|
| ^
|
||||||
input.y:6.1: error: invalid character: '%'
|
input.y:7.1: error: invalid character: '%'
|
||||||
6 | %&
|
7 | %&
|
||||||
| ^
|
| ^
|
||||||
input.y:6.2: error: invalid character: '&'
|
input.y:7.2: error: invalid character: '&'
|
||||||
6 | %&
|
7 | %&
|
||||||
| ^
|
| ^
|
||||||
input.y:7.1-17: error: invalid directive: '%a-does-not-exist'
|
input.y:8.1-17: error: invalid directive: '%a-does-not-exist'
|
||||||
7 | %a-does-not-exist
|
8 | %a-does-not-exist
|
||||||
| ^~~~~~~~~~~~~~~~~
|
| ^~~~~~~~~~~~~~~~~
|
||||||
input.y:8.1: error: invalid character: '%'
|
input.y:9.1: error: invalid character: '%'
|
||||||
8 | %-
|
9 | %-
|
||||||
| ^
|
| ^
|
||||||
input.y:8.2: error: invalid character: '-'
|
input.y:9.2: error: invalid character: '-'
|
||||||
8 | %-
|
9 | %-
|
||||||
| ^
|
| ^
|
||||||
input.y:9.1-10.0: error: missing '%}' at end of file
|
input.y:10.1-11.0: error: missing '%}' at end of file
|
||||||
9 | %{
|
10 | %{
|
||||||
| ^~
|
| ^~
|
||||||
]])
|
]])
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user