From 3bebedf1f810c3d6149b59b75923bf4ee2442696 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Antonio=20Ni=C3=B1o=20D=C3=ADaz?= Date: Thu, 22 Feb 2018 22:11:30 +0000 Subject: [PATCH] Handle newlines and comments correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Newlines have to be handled before comments or comments won't be able to handle line endings that don't include at least one LF character. Also, document an obscure comment syntax: Anything that follows a '*' placed at the start of a line is also a comment until the end of the line. Signed-off-by: Antonio Niño Díaz --- docs/rgbasm.5.html | 6 ++++++ src/asm/lexer.c | 40 +++++++++++++++++++++++++++++++++------- src/asm/rgbasm.5 | 6 ++++++ 3 files changed, 45 insertions(+), 7 deletions(-) diff --git a/docs/rgbasm.5.html b/docs/rgbasm.5.html index a9194599..88776565 100644 --- a/docs/rgbasm.5.html +++ b/docs/rgbasm.5.html @@ -42,6 +42,12 @@ Example:
All pseudo‐ops, mnemonics and registers (reserved keywords) are case‐insensitive and all labels are case‐sensitive. +
+There are two syntaxes for comments. In both cases, a comment ends at the end of + the line. The most common one is: anything that follows a semicolon + ";" (that isn't inside a string) is a comment. There is another + format: anything that follows a "*" that is placed right at the + start of a line is a comment.

Sections

Before you can start writing code, you must define a section. This tells the assembler what kind of information follows and, if it is code, where to put diff --git a/src/asm/lexer.c b/src/asm/lexer.c index 1462383a..0c728784 100644 --- a/src/asm/lexer.c +++ b/src/asm/lexer.c @@ -159,6 +159,8 @@ YY_BUFFER_STATE yy_create_buffer(FILE *f) pBuffer->pBuffer[size + 1] = 0; pBuffer->nBufferSize = size + 1; + /* Convert all line endings to LF and spaces */ + char *mem = pBuffer->pBuffer; uint32_t instring = 0; @@ -171,20 +173,44 @@ YY_BUFFER_STATE yy_create_buffer(FILE *f) } else if (instring) { mem += 1; } else { - if ((mem[0] == 10 && mem[1] == 13) - || (mem[0] == 13 && mem[1] == 10)) { + /* LF CR and CR LF */ + if (((mem[0] == 10) && (mem[1] == 13)) + || ((mem[0] == 13) && (mem[1] == 10))) { mem[0] = ' '; mem[1] = '\n'; mem += 2; - } else if (mem[0] == 10 || mem[0] == 13) { + /* LF and CR */ + } else if ((mem[0] == 10) || (mem[0] == 13)) { mem[0] = '\n'; mem += 1; - } else if (mem[0] == '\n' && mem[1] == '*') { + } else { mem += 1; - while (!(*mem == '\n' || *mem == '\0')) + } + } + } + + /* Remove comments */ + + mem = pBuffer->pBuffer; + instring = 0; + + while (*mem) { + if (*mem == '\"') + instring = 1 - instring; + + if ((mem[0] == '\\') && (mem[1] == '\"' || mem[1] == '\\')) { + mem += 2; + } else if (instring) { + mem += 1; + } else { + /* Comments that start with ; anywhere in a line */ + if (*mem == ';') { + while (!((*mem == '\n') || (*mem == '\0'))) *mem++ = ' '; - } else if (*mem == ';') { - while (!(*mem == '\n' || *mem == '\0')) + /* Comments that start with * at the start of a line */ + } else if ((mem[0] == '\n') && (mem[1] == '*')) { + mem += 1; + while (!((*mem == '\n') || (*mem == '\0'))) *mem++ = ' '; } else { mem += 1; diff --git a/src/asm/rgbasm.5 b/src/asm/rgbasm.5 index b3a00fd9..64a028fa 100644 --- a/src/asm/rgbasm.5 +++ b/src/asm/rgbasm.5 @@ -30,6 +30,12 @@ Example: .Pp All pseudo‐ops, mnemonics and registers (reserved keywords) are case‐insensitive and all labels are case‐sensitive. +.Pp +There are two syntaxes for comments. In both cases, a comment ends at the end of +the line. The most common one is: anything that follows a semicolon +\[dq]\&;\[dq] (that isn't inside a string) is a comment. There is another +format: anything that follows a \[dq]*\[dq] that is placed right at the start of +a line is a comment. .Ss Sections Before you can start writing code, you must define a section. This tells the assembler what kind of information follows and, if it is code,