mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-10 12:53:03 +00:00
diagnostics: fix the handling of multibyte characters
This is a pity: efforts were invested in computing correctly the number of screen columns consumed by multibyte characters, but the routines that do that were fed by single-byte inputs... As a consequence Bison never displayed correctly locations when there are multibyte characters. * src/scan-gram.l (mbchar): New. Use it instead of . in the catch-all clause. * tests/diagnostics.at (Tabulations): Enhance into... (Tabulations and multibyte characters): this.
This commit is contained in:
@@ -135,11 +135,13 @@ static void unexpected_newline (boundary, char const *);
|
||||
%x SC_BRACKETED_ID SC_RETURN_BRACKETED_ID
|
||||
|
||||
letter [.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]
|
||||
notletter [^.abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_]{-}[%\{]
|
||||
id {letter}({letter}|[-0-9])*
|
||||
int [0-9]+
|
||||
xint 0[xX][0-9abcdefABCDEF]+
|
||||
|
||||
/* UTF-8 Encoded Unicode Code Point, from Flex's documentation. */
|
||||
mbchar [\x09\x0A\x0D\x20-\x7E]|[\xC2-\xDF][\x80-\xBF]|\xE0[\xA0-\xBF][\x80-\xBF]|[\xE1-\xEC\xEE\xEF]([\x80-\xBF]{2})|\xED[\x80-\x9F][\x80-\xBF]|\xF0[\x\90-\xBF]([\x80-\xBF]{2})|[\xF1-\xF3]([\x80-\xBF]{3})|\xF4[\x80-\x8F]([\x80-\xBF]{2})
|
||||
|
||||
/* Zero or more instances of backslash-newline. Following GCC, allow
|
||||
white space between the backslash and the newline. */
|
||||
splice (\\[ \f\t\v]*\n)*
|
||||
@@ -790,8 +792,16 @@ eqopt ({sp}=)?
|
||||
| By default, grow the string obstack with the input. |
|
||||
`-----------------------------------------------------*/
|
||||
|
||||
<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. |
|
||||
<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW;
|
||||
<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
|
||||
{
|
||||
/* Accept multibyte characters in one block instead of byte after
|
||||
byte, so that add_column_width and mbsnwidth can compute correct
|
||||
screen width.
|
||||
|
||||
Add a fallthrough "|." so that non UTF-8 input is still accepted
|
||||
and does not jam the scanner. */
|
||||
{mbchar}|. STRING_GROW;
|
||||
}
|
||||
|
||||
%%
|
||||
|
||||
|
||||
@@ -17,22 +17,31 @@
|
||||
|
||||
AT_BANNER([[Diagnostics.]])
|
||||
|
||||
|
||||
# AT_TEST([TITLE], [GRAMMAR], [OUTPUT-WITH-STYLE])
|
||||
# ------------------------------------------------
|
||||
m4_pushdef([AT_TEST],
|
||||
[
|
||||
AT_SETUP([$1])
|
||||
AT_KEYWORDS([diagnostics])
|
||||
|
||||
# We need UTF-8 support for correct screen-width computation of UTF-8
|
||||
# characters. Skip the test if not available.
|
||||
AT_SKIP_IF([! locale -a | grep '^en_US.UTF-8$'])
|
||||
|
||||
AT_BISON_OPTION_PUSHDEFS
|
||||
|
||||
AT_DATA_GRAMMAR([[input.y]], [$2])
|
||||
|
||||
AT_DATA([experr], [$3])
|
||||
AT_BISON_CHECK([-fcaret --style=debug -Wall input.y], [], [], [experr])
|
||||
# Cannot use AT_BISON_CHECK easily as we need to change the
|
||||
# environment.
|
||||
# FIXME: Enhance AT_BISON_CHECK.
|
||||
AT_CHECK([LC_ALL=en_US.UTF-8 bison -fcaret --style=debug -Wall input.y], [], [], [experr])
|
||||
|
||||
# When no style, same messages, except the style.
|
||||
# When no style, same messages, but without style.
|
||||
AT_CHECK([perl -pi -e 's{</?\w+>}{}g' experr])
|
||||
AT_BISON_CHECK([-fcaret -Wall input.y], [], [], [experr])
|
||||
AT_CHECK([LC_ALL=en_US.UTF-8 bison -fcaret -Wall input.y], [], [], [experr])
|
||||
|
||||
AT_BISON_OPTION_POPDEFS
|
||||
|
||||
@@ -106,18 +115,24 @@ input.y:17.2: <warning>warning:</warning> empty rule without %empty [<warning>-W
|
||||
]])
|
||||
|
||||
|
||||
## ------------- ##
|
||||
## Tabulations. ##
|
||||
## ------------- ##
|
||||
## -------------------------------------- ##
|
||||
## Tabulations and multibyte characters. ##
|
||||
## -------------------------------------- ##
|
||||
|
||||
# Make sure we treat tabulations as eight spaces.
|
||||
# Make sure we treat tabulations as eight spaces, and that multibyte
|
||||
# characters have correct width.
|
||||
|
||||
AT_TEST([[Tabulations]],
|
||||
AT_TEST([[Tabulations and multibyte characters]],
|
||||
[[%%
|
||||
exp: a b c
|
||||
exp: a b c d e f g h
|
||||
a: { }
|
||||
b: { }
|
||||
c: {------------}
|
||||
d: {éééééééééééé}
|
||||
e: {∇⃗×𝐸⃗ = -∂𝐵⃗/∂t}
|
||||
f: { 42 }
|
||||
g: { "฿¥$€₦" }
|
||||
h: { 🐃 }
|
||||
]],
|
||||
[[input.y:11.4-17: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
|
||||
a: <warning>{ }</warning>
|
||||
@@ -128,6 +143,21 @@ input.y:12.4-17: <warning>warning:</warning> empty rule without %empty [<warning
|
||||
input.y:13.4-17: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
|
||||
c: <warning>{------------}</warning>
|
||||
<warning>^~~~~~~~~~~~~~</warning>
|
||||
input.y:14.4-17: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
|
||||
d: <warning>{éééééééééééé}</warning>
|
||||
<warning>^~~~~~~~~~~~~~</warning>
|
||||
input.y:15.4-17: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
|
||||
e: <warning>{∇⃗×𝐸⃗ = -∂𝐵⃗/∂t}</warning>
|
||||
<warning>^~~~~~~~~~~~~~</warning>
|
||||
input.y:16.4-17: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
|
||||
f: <warning>{ 42 }</warning>
|
||||
<warning>^~~~~~~~~~~~~~</warning>
|
||||
input.y:17.4-17: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
|
||||
g: <warning>{ "฿¥$€₦" }</warning>
|
||||
<warning>^~~~~~~~~~~~~~</warning>
|
||||
input.y:18.4-17: <warning>warning:</warning> empty rule without %empty [<warning>-Wempty-rule</warning>]
|
||||
h: <warning>{ 🐃 }</warning>
|
||||
<warning>^~~~~~~~~~~~~~</warning>
|
||||
]])
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user