From 647453a61474a1ebe9695568e59931fbaf6a338e Mon Sep 17 00:00:00 2001 From: Akim Demaille Date: Sat, 19 Sep 2020 17:13:14 +0200 Subject: [PATCH] examples: add a demonstration of GLR parsers in C Based on the test case 668 (cxx-type.at:437) "GLR: Merge conflicting parses, pure, locations". * examples/c/glr/Makefile, examples/c/glr/README.md, * examples/c/glr/c++-types.test, examples/c/glr/c++-types.y, * examples/c/glr/local.mk: New. --- examples/c/glr/Makefile | 28 ++++ examples/c/glr/README.md | 24 +++ examples/c/glr/c++-types.test | 47 ++++++ examples/c/glr/c++-types.y | 298 ++++++++++++++++++++++++++++++++++ examples/c/glr/local.mk | 34 ++++ examples/c/local.mk | 1 + 6 files changed, 432 insertions(+) create mode 100644 examples/c/glr/Makefile create mode 100644 examples/c/glr/README.md create mode 100644 examples/c/glr/c++-types.test create mode 100644 examples/c/glr/c++-types.y create mode 100644 examples/c/glr/local.mk diff --git a/examples/c/glr/Makefile b/examples/c/glr/Makefile new file mode 100644 index 00000000..6221ca9e --- /dev/null +++ b/examples/c/glr/Makefile @@ -0,0 +1,28 @@ +# This Makefile is designed to be simple and readable. It does not +# aim at portability. It requires GNU Make. + +BASE = calc +BISON = bison +XSLTPROC = xsltproc + +all: $(BASE) + +%.c %.h %.xml %.gv: %.y + $(BISON) $(BISONFLAGS) --defines --xml --graph=$*.gv -o $*.c $< + +$(BASE): $(BASE).o + $(CC) $(CFLAGS) -o $@ $^ + +run: $(BASE) + @echo "Type arithmetic expressions. Quit with ctrl-d." + ./$< + +html: $(BASE).html +%.html: %.xml + $(XSLTPROC) $(XSLTPROCFLAGS) -o $@ $$($(BISON) --print-datadir)/xslt/xml2xhtml.xsl $< + +CLEANFILES = \ + $(BASE) *.o $(BASE).[ch] $(BASE).output $(BASE).xml $(BASE).html $(BASE).gv + +clean: + rm -f $(CLEANFILES) diff --git a/examples/c/glr/README.md b/examples/c/glr/README.md new file mode 100644 index 00000000..60de8d65 --- /dev/null +++ b/examples/c/glr/README.md @@ -0,0 +1,24 @@ +# glr + +This example demonstrates the use of GLR parsers to handle (local) +ambiguities in the C++ language. See the node "Merging GLR Parses" in +Bison's documentation. + + diff --git a/examples/c/glr/c++-types.test b/examples/c/glr/c++-types.test new file mode 100644 index 00000000..0a1cdb26 --- /dev/null +++ b/examples/c/glr/c++-types.test @@ -0,0 +1,47 @@ +#! /bin/sh + +# Copyright (C) 2020 Free Software Foundation, Inc. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +cat >input <(T,x) +5.0-5.7: (T,x,y) +7.0-7.5: =(x,y) +9.0-9.9: +((x,T),y) +11.0-11.5: ((T,x),(x,T)) +13.0-13.13: ((T,y,+(z,q)),=((y,T),+(z,q))) +15.0-15.15: +17.0-17.5: +(z,q) +err: 15.5: syntax error, unexpected identifier, expecting '=' or '+' or ')'" diff --git a/examples/c/glr/c++-types.y b/examples/c/glr/c++-types.y new file mode 100644 index 00000000..a6118c0b --- /dev/null +++ b/examples/c/glr/c++-types.y @@ -0,0 +1,298 @@ +/* Simplified C++ Type and Expression Grammar. */ + +%define api.pure +%header +%define api.header.include {"c++-types.h"} +%locations +%debug + +/* Nice error messages with details. */ +%define parse.error detailed + +%code requires +{ + union Node { + struct { + int isNterm; + int parents; + } nodeInfo; + struct { + int isNterm; /* 1 */ + int parents; + char const *form; + union Node *children[3]; + } nterm; + struct { + int isNterm; /* 0 */ + int parents; + char *text; + } term; + }; + typedef union Node Node; +} + +%define api.value.type {Node *} + +%code +{ + +#include +#include +#include +#include +#include +#include + + static Node *new_nterm (char const *, Node *, Node *, Node *); + static Node *new_term (char *); + static void free_node (Node *); + static char *node_to_string (Node *); + static YYSTYPE stmtMerge (YYSTYPE x0, YYSTYPE x1); + + static int location_print (FILE *yyo, YYLTYPE const * const yylocp); + static void yyerror (YYLTYPE const * const llocp, const char *msg); + static int yylex (YYSTYPE *lvalp, YYLTYPE *llocp); +} + +%expect-rr 1 + +%token + TYPENAME "typename" + ID "identifier" + +%right '=' +%left '+' + +%glr-parser + +%destructor { free_node ($$); } stmt expr decl declarator TYPENAME ID + +%% + +prog : %empty + | prog stmt { + char *output = node_to_string ($2); + printf ("%d.%d-%d.%d: %s\n", + @2.first_line, @2.first_column, + @2.last_line, @2.last_column, + output); + free (output); + free_node ($2); + } + ; + +stmt : expr ';' %merge { $$ = $1; } + | decl %merge + | error ';' { $$ = new_nterm ("", NULL, NULL, NULL); } + | '@' { $$ = $1; YYACCEPT; } + ; + +expr : ID + | TYPENAME '(' expr ')' + { $$ = new_nterm ("(%s,%s)", $3, $1, NULL); } + | expr '+' expr { $$ = new_nterm ("+(%s,%s)", $1, $3, NULL); } + | expr '=' expr { $$ = new_nterm ("=(%s,%s)", $1, $3, NULL); } + ; + +decl : TYPENAME declarator ';' + { $$ = new_nterm ("(%s,%s)", $1, $2, NULL); } + | TYPENAME declarator '=' expr ';' + { $$ = new_nterm ("(%s,%s,%s)", $1, + $2, $4); } + ; + +declarator + : ID + | '(' declarator ')' { $$ = $2; } + ; + +%% + +int +main (int argc, char **argv) +{ + // Enable parse traces on option -p. + if (1 < argc && strcmp (argv[1], "-p") == 0) + yydebug = 1; + return yyparse (); +} + + +/* Print *YYLOCP on YYO. */ + +static int +location_print (FILE *yyo, YYLTYPE const * const yylocp) +{ + int res = 0; + int end_col = 0 != yylocp->last_column ? yylocp->last_column - 1 : 0; + if (0 <= yylocp->first_line) + { + res += fprintf (yyo, "%d", yylocp->first_line); + if (0 <= yylocp->first_column) + res += fprintf (yyo, ".%d", yylocp->first_column); + } + if (0 <= yylocp->last_line) + { + if (yylocp->first_line < yylocp->last_line) + { + res += fprintf (yyo, "-%d", yylocp->last_line); + if (0 <= end_col) + res += fprintf (yyo, ".%d", end_col); + } + else if (0 <= end_col && yylocp->first_column < end_col) + res += fprintf (yyo, "-%d", end_col); + } + return res; +} + +/* A C error reporting function. */ +static +void yyerror (YYLTYPE const * const llocp, const char *msg) +{ + location_print (stderr, llocp); + fprintf (stderr, ": %s\n", msg); +} + +int yylex (YYSTYPE *lvalp, YYLTYPE *llocp) +{ + static int lineNum = 1; + static int colNum = 0; + + while (1) + { + int c; + assert (!feof (stdin)); + c = getchar (); + switch (c) + { + case EOF: + return 0; + case '\t': + colNum = (colNum + 7) & ~7; + break; + case ' ': case '\f': + colNum += 1; + break; + case '\n': + lineNum += 1; + colNum = 0; + break; + default: + { + int tok; + llocp->first_line = llocp->last_line = lineNum; + llocp->first_column = colNum; + if (isalpha (c)) + { + char buffer[256]; + unsigned i = 0; + + do + { + buffer[i++] = (char) c; + colNum += 1; + assert (i != sizeof buffer - 1); + c = getchar (); + } + while (isalnum (c) || c == '_'); + + ungetc (c, stdin); + buffer[i++] = 0; + tok = isupper ((unsigned char) buffer[0]) ? TYPENAME : ID; + *lvalp = new_term (strcpy (malloc (i), buffer)); + } + else + { + colNum += 1; + tok = c; + *lvalp = NULL; + } + llocp->last_column = colNum-1; + return tok; + } + } + } +} + +static Node * +new_nterm (char const *form, Node *child0, Node *child1, Node *child2) +{ + Node *res = malloc (sizeof *res); + res->nterm.isNterm = 1; + res->nterm.parents = 0; + res->nterm.form = form; + res->nterm.children[0] = child0; + if (child0) + child0->nodeInfo.parents += 1; + res->nterm.children[1] = child1; + if (child1) + child1->nodeInfo.parents += 1; + res->nterm.children[2] = child2; + if (child2) + child2->nodeInfo.parents += 1; + return res; +} + +static Node * +new_term (char *text) +{ + Node *res = malloc (sizeof *res); + res->term.isNterm = 0; + res->term.parents = 0; + res->term.text = text; + return res; +} + +static void +free_node (Node *node) +{ + if (!node) + return; + node->nodeInfo.parents -= 1; + /* Free only if 0 (last parent) or -1 (no parents). */ + if (node->nodeInfo.parents > 0) + return; + if (node->nodeInfo.isNterm == 1) + { + free_node (node->nterm.children[0]); + free_node (node->nterm.children[1]); + free_node (node->nterm.children[2]); + } + else + free (node->term.text); + free (node); +} + +static char * +node_to_string (Node *node) +{ + char *res; + if (!node) + { + res = malloc (1); + res[0] = 0; + } + else if (node->nodeInfo.isNterm == 1) + { + char *child0 = node_to_string (node->nterm.children[0]); + char *child1 = node_to_string (node->nterm.children[1]); + char *child2 = node_to_string (node->nterm.children[2]); + res = malloc (strlen (node->nterm.form) + strlen (child0) + + strlen (child1) + strlen (child2) + 1); + sprintf (res, node->nterm.form, child0, child1, child2); + free (child2); + free (child1); + free (child0); + } + else + res = strdup (node->term.text); + return res; +} + + +static YYSTYPE +stmtMerge (YYSTYPE x0, YYSTYPE x1) +{ + return new_nterm ("(%s,%s)", x0, x1, NULL); +} + diff --git a/examples/c/glr/local.mk b/examples/c/glr/local.mk new file mode 100644 index 00000000..ad68a69b --- /dev/null +++ b/examples/c/glr/local.mk @@ -0,0 +1,34 @@ +## Copyright (C) 2020 Free Software Foundation, Inc. +## +## This program is free software: you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation, either version 3 of the License, or +## (at your option) any later version. +## +## This program is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with this program. If not, see . + +glrdir = $(docdir)/%D% + +## ----------- ## +## c++-types. ## +## ----------- ## + +check_PROGRAMS += %D%/c++-types +TESTS += %D%/c++-types.test +EXTRA_DIST += %D%/c++-types.test +nodist_%C%_c___types_SOURCES = %D%/c++-types.y +%D%/c++-types.c: $(dependencies) + +# Don't use gnulib's system headers. +%C%_c___types_CPPFLAGS = -I$(top_srcdir)/%D% -I$(top_builddir)/%D% +%C%_c___types_CFLAGS = $(TEST_CFLAGS) + +dist_glr_DATA = %D%/c++-types.y %D%/Makefile %D%/README.md +CLEANFILES += %D%/c++-types.c %D%/c++-types.output +CLEANDIRS += %D%/*.dSYM diff --git a/examples/c/local.mk b/examples/c/local.mk index 860d6994..e0851eb8 100644 --- a/examples/c/local.mk +++ b/examples/c/local.mk @@ -18,6 +18,7 @@ dist_c_DATA = %D%/README.md include %D%/bistromathic/local.mk include %D%/calc/local.mk +include %D%/glr/local.mk include %D%/lexcalc/local.mk include %D%/mfcalc/local.mk include %D%/pushcalc/local.mk