mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-13 14:23:04 +00:00
scan: fix typo in UTF-8 escape
We had:
```
-mbchar ...|\xF0[\x\90-\xBF]([\x80-\xBF]{2})|...
+mbchar ...|\xF0[\x90-\xBF]([\x80-\xBF]{2})|...
```
so a precise sequence that matches the incorrect regex can let NUL
bytes pass through, which triggers an assertion violation downstream.
It is a pity that Flex does not report an error for such input.
Reported by Ahcheong Lee <ahcheong.lee@gmail.com>.
<https://lists.gnu.org/r/bug-bison/2021-04/msg00003.html>
* src/scan-gram.l (mbchar): Fix the bad regex.
* tests/input.at (Invalid inputs): Check that case.
This commit is contained in:
@@ -160,7 +160,7 @@ xint 0[xX][0-9abcdefABCDEF]+
|
||||
eol \n|\r\n
|
||||
|
||||
/* UTF-8 Encoded Unicode Code Point, from Flex's documentation. */
|
||||
mbchar [\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF]([\x80-\xBF]{2})|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x\90-\xBF]([\x80-\xBF]{2})|[\xF1-\xF3]([\x80-\xBF]{3})|\xF4[\x80-\x8F]([\x80-\xBF]{2})
|
||||
mbchar [\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF]([\x80-\xBF]{2})|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x90-\xBF]([\x80-\xBF]{2})|[\xF1-\xF3]([\x80-\xBF]{3})|\xF4[\x80-\x8F]([\x80-\xBF]{2})
|
||||
|
||||
/* Zero or more instances of backslash-newline. Following GCC, allow
|
||||
white space between the backslash and the newline. */
|
||||
|
||||
Reference in New Issue
Block a user