From 999277ddd83679cb6d4017ec4b61658283265ec7 Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Tue, 4 Dec 2018 06:53:36 +0100 Subject: [PATCH] skeletons: start some technical documentation * data/README: Convert to Markdown. Start documenting some of the macros used in all our skeletons. Simplify and fix the documentation of the macros in the skeletons. --- data/README | 118 ++++++++++++++++++++++++++++++++++++++++++------ data/bison.m4 | 39 +--------------- data/c++.m4 | 3 -- data/c.m4 | 13 +----- data/glr.c | 8 ++-- data/java.m4 | 28 +++--------- data/lalr1.cc | 12 ++--- data/variant.hh | 1 + data/yacc.c | 9 ++-- 9 files changed, 125 insertions(+), 106 deletions(-) diff --git a/data/README b/data/README index 798750f7..0575e2fc 100644 --- a/data/README +++ b/data/README @@ -1,8 +1,9 @@ This directory contains data needed by Bison. -* Skeletons -Bison skeletons: the general shapes of the different parser kinds, -that are specialized for specific grammars by the bison program. +# Directory content +## Skeletons +Bison skeletons: the general shapes of the different parser kinds, that are +specialized for specific grammars by the bison program. Currently, the supported skeletons are: @@ -22,19 +23,18 @@ Currently, the supported skeletons are: - glr.cc A Generalized LR C++ parser. Actually a C++ wrapper around glr.c. -These skeletons are the only ones supported by the Bison team. -Because the interface between skeletons and the bison program is not -finished, *we are not bound to it*. In particular, Bison is not -mature enough for us to consider that "foreign skeletons" are -supported. +These skeletons are the only ones supported by the Bison team. Because the +interface between skeletons and the bison program is not finished, *we are +not bound to it*. In particular, Bison is not mature enough for us to +consider that "foreign skeletons" are supported. -* m4sugar -This directory contains M4sugar, sort of an extended library for M4, -which is used by Bison to instantiate the skeletons. +## m4sugar +This directory contains M4sugar, sort of an extended library for M4, which +is used by Bison to instantiate the skeletons. -* xslt -This directory contains XSLT programs that transform Bison's XML output -into various formats. +## xslt +This directory contains XSLT programs that transform Bison's XML output into +various formats. - bison.xsl A library of routines used by the other XSLT programs. @@ -48,10 +48,98 @@ into various formats. - xml2xhtml.xsl Conversion into XHTML. +# Implementation note about the skeletons + +"Skeleton" in Bison parlance means "backend": a skeleton is fed by the bison +executable with LR tables, facts about the symbols, etc. and they generate +the output (say parser.cc, parser.hh, location.hh, etc.). They are only in +charge of generating the parser and its auxiliary files, they do not +generate the XML output, the parser.output reports, nor the graphical +rendering. + +The bits of information passing from bison to the backend is named +"muscles". Muscles are passed to M4 via its standard input: it's a set of +m4 definitions. To see them, use `--trace=muscles`. + +Except for muscles, whose names are generated by bison, the skeletons have +no constraint at all on the macro names: there is no technical/theoretical +limitation, as long as you generate the output, you can do what you want. +However, of course, that would be a bad idea if, say, the C and C++ +skeletons used different approaches and had completely different +implementations. That would be a maintenance nightmare. + +Below, we document some of the macros that we use in several of the +skeletons. If you are to write a new skeleton, please, implement them for +your language. Overall, be sure to follow the same patterns as the existing +skeletons. + +## Symbols + +In order to unify the handling of the various aspects of symbols +(tag, type_name, whether terminal, etc.), bison.exe defines one +macro per (token, field), where field can has_id, id, etc.: see +src/output.c:prepare_symbols_definitions(). + +The various FIELDS are: + +- has_id: 0 or 1. + Whether the symbol has an id. +- id: string + If has_id, the id. Guaranteed to be usable as a C identifier. + Prefixed by api.token.prefix if defined. +- tag: string. + A representation of the symbol. Can be 'foo', 'foo.id', '"foo"' etc. +- user_number: integer + The assigned (external) number as used by yylex. +- is_token: 0 or 1 + Whether this is a terminal symbol. +- number: integer + The internalized number (used after yytranslate). +- has_type: 0, 1 + Whether has a semantic value. +- type_tag: string + When api.value.type=union, the generated name for the union member. + yytype_INT etc. for symbols that has_id, otherwise yytype_1 etc. +- type + If it has a semantic value, its type tag, or, if variant are used, + its type. + In the case of api.value.type=union, type is the real type (e.g. int). +- has_printer: 0, 1 +- printer: string +- printer_file: string +- printer_line: integer + If the symbol has a printer, everything about it. +- has_destructor, destructor, destructor_file, destructor_line + Likewise. + +### b4_symbol_value(VAL, [SYMBOL-NUM], [TYPE-TAG]) +Expansion of $$, $1, $3, etc. + +The semantic value from a given VAL. +- `VAL`: some semantic value storage (typically a union). e.g., `yylval` +- `SYMBOL-NUM`: the symbol number from which we extract the type tag. +- `TYPE-TAG`, the user forced the ``. + +The result can be used safely, it is put in parens to avoid nasty precedence +issues. + +### b4_lhs_value(SYMBOL-NUM, [TYPE]) +Expansion of `$$` or `$$`, for symbol `SYMBOL-NUM`. + +### b4_rhs_data(RULE-LENGTH, POS) +The data corresponding to the symbol `#POS`, where the current rule has +`RULE-LENGTH` symbols on RHS. + +### b4_rhs_value(RULE-LENGTH, POS, SYMBOL-NUM, [TYPE]) +Expansion of `$POS`, where the current rule has `RULE-LENGTH` symbols +on RHS. + ----- Local Variables: -mode: outline +mode: markdown +fill-column: 76 +ispell-dictionary: "american" End: Copyright (C) 2002, 2008-2015, 2018 Free Software Foundation, Inc. diff --git a/data/bison.m4 b/data/bison.m4 index 9b80f7d2..484bc813 100644 --- a/data/bison.m4 +++ b/data/bison.m4 @@ -378,44 +378,9 @@ b4_define_flag_if([yacc]) # Whether POSIX Yacc is emulated. ## Symbols. ## ## --------- ## -# In order to unify the handling of the various aspects of symbols -# (tag, type_name, whether terminal, etc.), bison.exe defines one -# macro per (token, field), where field can has_id, id, etc.: see -# src/output.c:prepare_symbols_definitions(). +# For a description of the Symbol handling, see README. # -# The various FIELDS are: -# -# - has_id: 0 or 1. -# Whether the symbol has an id. -# - id: string -# If has_id, the id. Guaranteed to be usable as a C identifier. -# Prefixed by api.token.prefix if defined. -# - tag: string. -# A representation of the symbol. Can be 'foo', 'foo.id', '"foo"' etc. -# - user_number: integer -# The assigned (external) number as used by yylex. -# - is_token: 0 or 1 -# Whether this is a terminal symbol. -# - number: integer -# The internalized number (used after yytranslate). -# - has_type: 0, 1 -# Whether has a semantic value. -# - type_tag: string -# When api.value.type=union, the generated name for the union member. -# yytype_INT etc. for symbols that has_id, otherwise yytype_1 etc. -# - type -# If it has a semantic value, its type tag, or, if variant are used, -# its type. -# In the case of api.value.type=union, type is the real type (e.g. int). -# - has_printer: 0, 1 -# - printer: string -# - printer_file: string -# - printer_line: integer -# If the symbol has a printer, everything about it. -# - has_destructor, destructor, destructor_file, destructor_line -# Likewise. -# -# The following macros provide access to these values. +# The following macros provide access to symbol related values. # _b4_symbol(NUM, FIELD) # ---------------------- diff --git a/data/c++.m4 b/data/c++.m4 index e3993edc..86e686a2 100644 --- a/data/c++.m4 +++ b/data/c++.m4 @@ -523,15 +523,12 @@ m4_define([b4_yytranslate_define], # b4_lhs_value([TYPE]) # -------------------- -# Expansion of $$. m4_define([b4_lhs_value], [b4_symbol_value([yyval], [$1])]) # b4_rhs_value(RULE-LENGTH, POS, [TYPE]) # -------------------------------------- -# Expansion of $POS, where the current rule has RULE-LENGTH -# symbols on RHS. # FIXME: Dead code. m4_define([b4_rhs_value], [b4_symbol_value([yysemantic_stack_@{($1) - ($2)@}], [$3])]) diff --git a/data/c.m4 b/data/c.m4 index 5e253d63..b51aeca5 100644 --- a/data/c.m4 +++ b/data/c.m4 @@ -375,18 +375,7 @@ m4_define([b4_token_enums_defines], # b4_symbol_value(VAL, [SYMBOL-NUM], [TYPE-TAG]) # ---------------------------------------------- -# Expansion of $$, $1, $3, etc. -# -# The semantic value from a given VAL. -# -# VAL: some semantic value storage (typically a union). -# e.g., yylval -# SYMBOL-NUM: the symbol number from which we extract the -# type tag. -# TYPE-TAG, the user forced the . -# -# The result can be used safely, it is put in parens to avoid nasty -# precedence issues. +# See README. m4_define([b4_symbol_value], [m4_ifval([$3], [($1.$3)], diff --git a/data/glr.c b/data/glr.c index f3e5fc34..de3fc90d 100644 --- a/data/glr.c +++ b/data/glr.c @@ -114,23 +114,21 @@ m4_define([b4_locuser_args], # b4_lhs_value(SYMBOL-NUM, [TYPE]) # -------------------------------- -# Expansion of $$ or $$, for symbol SYMBOL-NUM. +# See README. m4_define([b4_lhs_value], [b4_symbol_value([(*yyvalp)], [$1], [$2])]) # b4_rhs_data(RULE-LENGTH, POS) # ----------------------------- -# Expand to the semantic stack place that contains value and location -# of symbol number POS in a rule of length RULE-LENGTH. +# See README. m4_define([b4_rhs_data], [((yyGLRStackItem const *)yyvsp)@{YYFILL (b4_subtract([$2], [$1]))@}.yystate]) # b4_rhs_value(RULE-LENGTH, POS, SYMBOL-NUM, [TYPE]) # -------------------------------------------------- -# Expansion of $POS, where the current rule has RULE-LENGTH -# symbols on RHS. +# Expansion of $$ or $$, for symbol SYMBOL-NUM. m4_define([b4_rhs_value], [b4_symbol_value([b4_rhs_data([$1], [$2]).yysemantics.yysval], [$3], [$4])]) diff --git a/data/java.m4 b/data/java.m4 index 5290d590..d2d7c42b 100644 --- a/data/java.m4 +++ b/data/java.m4 @@ -221,18 +221,7 @@ m4_define([b4_position_type], [b4_percent_define_get([[api.position.type]])]) # b4_symbol_value(VAL, [SYMBOL-NUM], [TYPE-TAG]) # ---------------------------------------------- -# Expansion of $$, $1, $3, etc. -# -# The semantic value from a given VAL. -# -# VAL: some semantic value storage (typically a union). -# e.g., yylval -# SYMBOL-NUM: the symbol number from which we extract the -# type tag. -# TYPE-TAG, the user forced the . -# -# The result can be used safely, it is put in parens to avoid nasty -# precedence issues. +# See README. m4_define([b4_symbol_value], [m4_ifval([$3], [(($3)($1))], @@ -243,26 +232,21 @@ m4_define([b4_symbol_value], [$1])])]) -# b4_lhs_value([TYPE]) -# -------------------- -# Expansion of $$. +# b4_lhs_value([SYMBOL-NUM], [TYPE]) +# ---------------------------------- +# See README. m4_define([b4_lhs_value], [yyval]) # b4_rhs_data(RULE-LENGTH, POS) # ----------------------------- -# Expansion of $POS, where the current rule has RULE-LENGTH -# symbols on RHS. -# -# In this simple implementation, %token and %type have class names -# between the angle brackets. +# See README. m4_define([b4_rhs_data], [yystack.valueAt ($1-($2))]) # b4_rhs_value(RULE-LENGTH, POS, SYMBOL-NUM, [TYPE]) # -------------------------------------------------- -# Expansion of $POS, where the current rule has RULE-LENGTH -# symbols on RHS. +# See README. # # In this simple implementation, %token and %type have class names # between the angle brackets. diff --git a/data/lalr1.cc b/data/lalr1.cc index e97b9b41..26d9e286 100644 --- a/data/lalr1.cc +++ b/data/lalr1.cc @@ -50,9 +50,9 @@ m4_copy([b4_symbol_value], [b4_symbol_value_template]) m4_append([b4_value_type_setup_union], [m4_copy_force([b4_symbol_value_union], [b4_symbol_value_template])]) -# b4_lhs_value(SYMBOL-NUM, SYMBOL-NUM, [TYPE]) -# -------------------------------------------- -# Expansion of $$ or $$, for symbol SYMBOL-NUM. +# b4_lhs_value(SYMBOL-NUM, [TYPE]) +# -------------------------------- +# See README. m4_define([b4_lhs_value], [b4_symbol_value([yylhs.value], [$1], [$2])]) @@ -66,8 +66,7 @@ m4_define([b4_lhs_location], # b4_rhs_data(RULE-LENGTH, POS) # ----------------------------- -# Return the data corresponding to the symbol #POS, where the current -# rule has RULE-LENGTH symbols on RHS. +# See README. m4_define([b4_rhs_data], [yystack_@{b4_subtract($@)@}]) @@ -82,8 +81,7 @@ m4_define([b4_rhs_state], # b4_rhs_value(RULE-LENGTH, POS, SYMBOL-NUM, [TYPE]) # -------------------------------------------------- -# Expansion of $POS, where the current rule has RULE-LENGTH -# symbols on RHS. +# See README. m4_define([_b4_rhs_value], [b4_symbol_value([b4_rhs_data([$1], [$2]).value], [$3], [$4])]) diff --git a/data/variant.hh b/data/variant.hh index eea2c765..19342f9f 100644 --- a/data/variant.hh +++ b/data/variant.hh @@ -306,6 +306,7 @@ m4_define([b4_value_type_declare], # b4_symbol_value(VAL, SYMBOL-NUM, [TYPE]) # ---------------------------------------- +# See README. m4_define([b4_symbol_value], [m4_ifval([$3], [$1.as< $3 > ()], diff --git a/data/yacc.c b/data/yacc.c index e74ba71a..bee27d92 100644 --- a/data/yacc.c +++ b/data/yacc.c @@ -130,15 +130,14 @@ m4_define([b4_int_type], # b4_lhs_value(SYMBOL-NUM, [TYPE]) # -------------------------------- -# Expansion of $$ or $$, for symbol SYMBOL-NUM. +# See README. m4_define([b4_lhs_value], [b4_symbol_value(yyval, [$1], [$2])]) -# b4_rhs_value(RULE-LENGTH, POS, SYMBOL-NUM, [TYPE]) -# -------------------------------------------------- -# Expansion of $POS, where the current rule has RULE-LENGTH -# symbols on RHS. +# b4_rhs_value(RULE-LENGTH, POS, [SYMBOL-NUM], [TYPE]) +# ---------------------------------------------------- +# See README. m4_define([b4_rhs_value], [b4_symbol_value([yyvsp@{b4_subtract([$2], [$1])@}], [$3], [$4])])