From fec13ce2db675d18afb887e3c98f347a2b3de31e Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Wed, 2 Oct 2019 08:57:58 +0200 Subject: [PATCH] diagnostics: sort symbols per location Because the checking of the grammar is made by phases after the whole grammar was read, we sometimes have diagnostics that look weird. In some case, within one type of checking, the entities are not checked in the order in which they appear in the file. For instance, checking symbols is done on the list of symbols sorted by tag: foo.y:1.20-22: warning: symbol BAR is used, but is not defined as a token and has no rules [-Wother] 1 | %destructor {} QUX BAR | ^~~ foo.y:1.16-18: warning: symbol QUX is used, but is not defined as a token and has no rules [-Wother] 1 | %destructor {} QUX BAR | ^~~ Let's sort them by location instead: foo.y:1.16-18: warning: symbol 'QUX' is used, but is not defined as a token and has no rules [-Wother] 1 | %destructor {} QUX BAR | ^~~ foo.y:1.20-22: warning: symbol 'BAR' is used, but is not defined as a token and has no rules [-Wother] 1 | %destructor {} QUX BAR | ^~~ * src/location.h (location_cmp): Be robust to empty file names. * src/symtab.c (symbol_cmp): Sort by location. * tests/input.at: Adjust expectations. --- src/location.h | 7 ++++++- src/symtab.c | 17 ++++++++--------- tests/diagnostics.at | 8 ++++---- tests/input.at | 16 ++++++++-------- 4 files changed, 26 insertions(+), 22 deletions(-) diff --git a/src/location.h b/src/location.h index 8eb7c472..cbd6f482 100644 --- a/src/location.h +++ b/src/location.h @@ -71,7 +71,12 @@ boundary_set (boundary *p, const char *f, int l, int c, int b) static inline int boundary_cmp (boundary a, boundary b) { - int res = strcmp (a.file, b.file); + /* Locations with no file first. */ + int res = + a.file && b.file ? strcmp (a.file, b.file) + : a.file ? 1 + : b.file ? -1 + : 0; if (!res) res = a.line - b.line; if (!res) diff --git a/src/symtab.c b/src/symtab.c index b3642e67..f2454725 100644 --- a/src/symtab.c +++ b/src/symtab.c @@ -925,8 +925,8 @@ symbols_free (void) static int symbol_cmp (void const *a, void const *b) { - return strcmp ((*(symbol * const *)a)->tag, - (*(symbol * const *)b)->tag); + return location_cmp ((*(symbol * const *)a)->location, + (*(symbol * const *)b)->location); } /* Store in *SORTED an array of pointers to the symbols contained in @@ -952,14 +952,13 @@ void symbols_check_defined (void) { table_sort (symbol_table, &symbols_sorted); - /* semantic_type, like symbol, starts with a 'tag' field. And here - we only deal with arrays/hashes of pointers, sizeof is not an - issue. + /* semantic_type, like symbol, starts with a 'tag' field and then a + 'location' field. And here we only deal with arrays/hashes of + pointers, sizeof is not an issue. So instead of implementing table_sort (and symbol_cmp) once for each type, let's lie a bit to the typing system, and treat - 'semantic_type' as if it were 'symbol'. Anyway this is only - about arrays of pointers. */ + 'semantic_type' as if it were 'symbol'. */ table_sort (semantic_type_table, (symbol ***) &semantic_types_sorted); for (int i = 0; symbols_sorted[i]; ++i) @@ -1019,7 +1018,7 @@ symbols_token_translations_init (void) for (int i = 0; i < max_user_token_number + 1; ++i) token_translations[i] = undeftoken->content->number; for (int i = 0; symbols_sorted[i]; ++i) - symbol_translation(symbols_sorted[i]); + symbol_translation (symbols_sorted[i]); } @@ -1033,7 +1032,7 @@ symbols_pack (void) { symbols = xcalloc (nsyms, sizeof *symbols); for (int i = 0; symbols_sorted[i]; ++i) - symbol_pack(symbols_sorted[i]); + symbol_pack (symbols_sorted[i]); /* Aliases leave empty slots in symbols, so remove them. */ { diff --git a/tests/diagnostics.at b/tests/diagnostics.at index bd01c15d..deac06ff 100644 --- a/tests/diagnostics.at +++ b/tests/diagnostics.at @@ -427,16 +427,16 @@ res: QUX baz bar: QUUX ]], [1], -[[input.y:11.6-9: error: symbol 'QUUX' is used, but is not defined as a token and has no rules - 11 | bar: QUUX - | ^~~~ -input.y:10.6-8: error: symbol 'QUX' is used, but is not defined as a token and has no rules +[[input.y:10.6-8: error: symbol 'QUX' is used, but is not defined as a token and has no rules 10 | res: QUX baz | ^~~ input.y:10.10-12: error: symbol 'baz' is used, but is not defined as a token and has no rules; did you mean 'bar'? 10 | res: QUX baz | ^~~ | bar +input.y:11.6-9: error: symbol 'QUUX' is used, but is not defined as a token and has no rules + 11 | bar: QUUX + | ^~~~ ]]) diff --git a/tests/input.at b/tests/input.at index e5d7666b..976e5395 100644 --- a/tests/input.at +++ b/tests/input.at @@ -822,15 +822,15 @@ exp: bar; ]]) AT_BISON_CHECK([-fcaret input.y], [1], [], -[[input.y:2.16-18: error: symbol 'bar' is used, but is not defined as a token and has no rules - 2 | %destructor {} bar - | ^~~ +[[input.y:1.13-15: warning: symbol 'foo' is used, but is not defined as a token and has no rules [-Wother] + 1 | %printer {} foo baz + | ^~~ input.y:1.17-19: warning: symbol 'baz' is used, but is not defined as a token and has no rules [-Wother] 1 | %printer {} foo baz | ^~~ -input.y:1.13-15: warning: symbol 'foo' is used, but is not defined as a token and has no rules [-Wother] - 1 | %printer {} foo baz - | ^~~ +input.y:2.16-18: error: symbol 'bar' is used, but is not defined as a token and has no rules + 2 | %destructor {} bar + | ^~~ input.y:3.13-15: warning: symbol 'qux' is used, but is not defined as a token and has no rules [-Wother] 3 | %type qux | ^~~ @@ -1061,9 +1061,9 @@ AT_BISON_CHECK([-fcaret input.y], [1], [], input.y:2.10-12: error: user token number 42 redeclaration for BAR 2 | BAR 42 "foo" | ^~~ -input.y:1.8-10: previous declaration for FOO +input.y:1.15-19: previous declaration for "foo" 1 | %token FOO 42 "foo" - | ^~~ + | ^~~~~ ]]) AT_CLEANUP