mirror of
https://git.savannah.gnu.org/git/bison.git
synced 2026-03-09 12:23:04 +00:00
doc: explain why location's "column" are defined vaguely
Suuggested by Frank Heckenbach. <https://lists.gnu.org/r/bug-bison/2022-01/msg00000.html> * doc/bison.texi (Location Type): Explain why location's "column" are defined vaguely. Show tab handling in ltcalc and calc++. * examples/c/bistromathic/parse.y: Show tab handling. * examples/c++/calc++/calc++.test, * examples/c/bistromathic/bistromathic.test: Check tab handling.
This commit is contained in:
@@ -2365,6 +2365,8 @@ analyzer.
|
|||||||
* Ltcalc Lexer:: The lexical analyzer.
|
* Ltcalc Lexer:: The lexical analyzer.
|
||||||
@end menu
|
@end menu
|
||||||
|
|
||||||
|
See @ref{Tracking Locations} for details about locations.
|
||||||
|
|
||||||
@node Ltcalc Declarations
|
@node Ltcalc Declarations
|
||||||
@subsection Declarations for @code{ltcalc}
|
@subsection Declarations for @code{ltcalc}
|
||||||
|
|
||||||
@@ -2488,7 +2490,7 @@ yylex (void)
|
|||||||
@group
|
@group
|
||||||
/* Skip white space. */
|
/* Skip white space. */
|
||||||
while ((c = getchar ()) == ' ' || c == '\t')
|
while ((c = getchar ()) == ' ' || c == '\t')
|
||||||
++yylloc.last_column;
|
yylloc.last_column += c == '\t' ? 8 - ((yylloc.last_column - 1) & 7) : 1;
|
||||||
@end group
|
@end group
|
||||||
|
|
||||||
@group
|
@group
|
||||||
@@ -4751,6 +4753,33 @@ to 1 for @code{yylloc} at the beginning of the parsing. To initialize
|
|||||||
initialization), use the @code{%initial-action} directive. @xref{Initial
|
initialization), use the @code{%initial-action} directive. @xref{Initial
|
||||||
Action Decl}.
|
Action Decl}.
|
||||||
|
|
||||||
|
@sp 1
|
||||||
|
|
||||||
|
@cindex column
|
||||||
|
The meaning of ``column'' is deliberately left vague since there are several
|
||||||
|
options, depending on the use cases.
|
||||||
|
|
||||||
|
With multibyte input (say UTF-8), simply counting the number of bytes does
|
||||||
|
not match character positions on the screen. One needs advanced functions
|
||||||
|
mapping multibyte characters to their visual width (see for instance
|
||||||
|
Gnulib's @code{mbswidth} and @code{mbsnwidth} functions). Tabulation
|
||||||
|
characters probably need a dedicated implementation, to match the ``go to
|
||||||
|
next multiple of 8'' behavior.
|
||||||
|
|
||||||
|
However to quote input in error messages, as @command{bison} does:
|
||||||
|
|
||||||
|
@example
|
||||||
|
@group
|
||||||
|
1.10-12: @derror{error}: invalid identifier: ‘3.8’
|
||||||
|
1 | %require @derror{3.8}
|
||||||
|
| @derror{^~~}
|
||||||
|
@end group
|
||||||
|
@end example
|
||||||
|
|
||||||
|
@noindent
|
||||||
|
then byte positions are more handy. So in some cases, tracking both visual
|
||||||
|
character position @emph{and} byte position is the best option. This is
|
||||||
|
what @command{bison} does.
|
||||||
|
|
||||||
@node Actions and Locations
|
@node Actions and Locations
|
||||||
@subsection Actions and Locations
|
@subsection Actions and Locations
|
||||||
@@ -13776,8 +13805,14 @@ the blanks preceding tokens. Comments would be treated equally.
|
|||||||
@example
|
@example
|
||||||
@group
|
@group
|
||||||
%@{
|
%@{
|
||||||
|
// Take 8-space tabulations into account.
|
||||||
|
void add_columns (yy::location& loc, const char *buf, int bufsize)
|
||||||
|
@{
|
||||||
|
for (int i = 0; i < bufsize; ++i)
|
||||||
|
loc.columns (buf[i] == '\t' ? 8 - ((loc.end.column - 1) & 7) : 1);
|
||||||
|
@}
|
||||||
// Code run each time a pattern is matched.
|
// Code run each time a pattern is matched.
|
||||||
# define YY_USER_ACTION loc.columns (yyleng);
|
#define YY_USER_ACTION add_columns (loc, yytext, yyleng);
|
||||||
%@}
|
%@}
|
||||||
@end group
|
@end group
|
||||||
%%
|
%%
|
||||||
|
|||||||
@@ -50,6 +50,21 @@ EOF
|
|||||||
run 1 'err: -:2.1: syntax error, unexpected end of file, expecting ( or identifier or number'
|
run 1 'err: -:2.1: syntax error, unexpected end of file, expecting ( or identifier or number'
|
||||||
|
|
||||||
|
|
||||||
|
# Check handling of tabs.
|
||||||
|
cat >input <<EOF
|
||||||
|
*1
|
||||||
|
EOF
|
||||||
|
run 1 'err: -:1.9: syntax error, unexpected *, expecting ( or identifier or number'
|
||||||
|
cat >input <<EOF
|
||||||
|
*2
|
||||||
|
EOF
|
||||||
|
run 1 'err: -:1.9: syntax error, unexpected *, expecting ( or identifier or number'
|
||||||
|
cat >input <<EOF
|
||||||
|
*3
|
||||||
|
EOF
|
||||||
|
run 1 'err: -:1.9: syntax error, unexpected *, expecting ( or identifier or number'
|
||||||
|
|
||||||
|
|
||||||
# LAC finds many more tokens.
|
# LAC finds many more tokens.
|
||||||
cat >input <<EOF
|
cat >input <<EOF
|
||||||
a := 1
|
a := 1
|
||||||
|
|||||||
@@ -366,3 +366,26 @@ err: 1.15: syntax error: expected - or ( or number or function or variable befor
|
|||||||
err: 1 | (1++2) + 3 + ''
|
err: 1 | (1++2) + 3 + ''
|
||||||
err: | ^
|
err: | ^
|
||||||
'
|
'
|
||||||
|
|
||||||
|
# Check handling of literal tabs. "Escape" them with a C-v, so that
|
||||||
|
# they are not processed as completion requests.
|
||||||
|
cat >input<<EOF
|
||||||
|
*1
|
||||||
|
*2
|
||||||
|
*3
|
||||||
|
EOF
|
||||||
|
# readline processes the tabs itself, and replaces then with spaces.
|
||||||
|
run -n 0 '> *1
|
||||||
|
> *2
|
||||||
|
> *3
|
||||||
|
> ''
|
||||||
|
err: 1.9: syntax error: expected end of file or - or ( or exit or number or function etc., before *
|
||||||
|
err: 1 | *1
|
||||||
|
err: | ^
|
||||||
|
err: 2.9: syntax error: expected end of file or - or ( or exit or number or function etc., before *
|
||||||
|
err: 2 | *2
|
||||||
|
err: | ^
|
||||||
|
err: 3.9: syntax error: expected end of file or - or ( or exit or number or function etc., before *
|
||||||
|
err: 3 | *3
|
||||||
|
err: | ^
|
||||||
|
'
|
||||||
|
|||||||
@@ -308,14 +308,15 @@ yylex (const char **line, YYSTYPE *yylval, YYLTYPE *yylloc,
|
|||||||
{
|
{
|
||||||
int c;
|
int c;
|
||||||
|
|
||||||
// Ignore white space, get first nonwhite character.
|
// Get next character, ignore white spaces.
|
||||||
do {
|
do {
|
||||||
// Move the first position onto the last.
|
// Move the first position onto the last.
|
||||||
yylloc->first_line = yylloc->last_line;
|
yylloc->first_line = yylloc->last_line;
|
||||||
yylloc->first_column = yylloc->last_column;
|
yylloc->first_column = yylloc->last_column;
|
||||||
|
|
||||||
yylloc->last_column += 1;
|
|
||||||
c = *((*line)++);
|
c = *((*line)++);
|
||||||
|
// Tab characters go to the next column multiple of 8.
|
||||||
|
yylloc->last_column +=
|
||||||
|
c == '\t' ? 8 - ((yylloc->last_column - 1) & 7) : 1;
|
||||||
} while (c == ' ' || c == '\t');
|
} while (c == ' ' || c == '\t');
|
||||||
|
|
||||||
switch (c)
|
switch (c)
|
||||||
|
|||||||
Reference in New Issue
Block a user