ossp-pkg/cfg/cfg_syn_scan.l
%{
/*
** OSSP cfg - Configuration Parsing
** Copyright (c) 2002-2006 Ralf S. Engelschall <rse@engelschall.com>
** Copyright (c) 2002-2006 The OSSP Project (http://www.ossp.org/)
**
** This file is part of OSSP cfg, a configuration parsing
** library which can be found at http://www.ossp.org/pkg/lib/cfg/.
**
** Permission to use, copy, modify, and distribute this software for
** any purpose with or without fee is hereby granted, provided that
** the above copyright notice and this permission notice appear in all
** copies.
**
** THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
** WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
** IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
** CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
** USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
** OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
** OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
** SUCH DAMAGE.
**
** cfg_syn_scan.l: regular grammar specification for GNU Flex
**
** ATTENTION: This requires GNU Flex 2.5.10 or newer!
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "cfg.h"
#include "cfg_global.h"
#include "cfg_buf.h"
#include "cfg_syn.h"
#include "cfg_syn_parse.h"
/* how to find our own context */
#define CTX ((cfg_syn_ctx_t *)yyget_extra(yyscanner))
/* provide own input handling */
#undef YY_INPUT
#define YY_INPUT(buf,result,max_size) (result = yyinput(CTX, buf, max_size))
static int yyinput(cfg_syn_ctx_t *ctx, char *buf, int max_size);
/* location tracking */
#define YY_USER_INIT \
yylloc->first = 0; \
yylloc->last = 0;
#define YY_USER_ACTION \
yylloc->first = yylloc->last; \
yylloc->last += yyleng;
#define YY_USER_ACTION_ROLLBACK \
yylloc->last = yylloc->first
static char closing_brace(char open);
static int hex_nibble(const char hex);
static int hex_sequence(cfg_syn_ctx_t *ctx, const char *in_ptr, size_t in_len);
%}
/* scanner options */
%pointer
%option stack
%option reentrant
%option bison-bridge
%option bison-locations
%option never-interactive
%option noyywrap
%option nounput
%option noyy_top_state
%option nounistd
/* scanner states */
%x SS_DQ
%x SS_SQ
%x SS_FQ
%x SS_PT
%x SS_CO_C
/* the delimiting character for flexible quoting has to one a
special character c, i.e., one of !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ or
in C code: isprint(c) && !isspace(c) && !iscntrl(c) && !isalpha(i) && !isdigit(i) */
FQDEL [\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]
FQDELN [^\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]
%%
/* local variables */
int nCommentOpen = 0;
int nQuoteOpen = 0;
char cQuoteOpen = '\0';
char cQuoteClose = '\0';
/* whitespaces */
[ \t\n]+ {
/* no-op */
}
/* C-style block comment */
"/*" {
nCommentOpen = 1;
BEGIN(SS_CO_C);
}
<SS_CO_C>"/*" {
nCommentOpen++;
}
<SS_CO_C>"*/" {
nCommentOpen--;
if (nCommentOpen == 0)
BEGIN(INITIAL);
}
<SS_CO_C>(.|\n) {
/* no-op */
}
<SS_CO_C><<EOF>> {
cfg_syn_error(CTX, CFG_ERR_SYN, yylloc, "unterminated C-style block comment");
yyterminate();
}
/* C++-style EOL comment */
"//"[^\n]* {
/* no-op */
}
/* Shell-style EOL comment */
"#"[^\n]* {
/* no-op */
}
/* double-quoted word ("...") */
\" {
cfg_buf_resize(CTX->buf, 0);
BEGIN(SS_DQ);
}
<SS_DQ>\" {
cfg_buf_content(CTX->buf, &yylval->cpString, NULL, NULL);
BEGIN(INITIAL);
return T_STRING;
}
<SS_DQ>\\\n[ \t]* {
/* no-op */
}
<SS_DQ>\\[0-7]{1,3} {
unsigned int result;
(void)sscanf(yytext+1, "%o", &result);
if (result > 0xff) {
cfg_syn_error(CTX, CFG_ERR_SYN, yylloc, "escape sequence out of bound");
yyterminate();
}
cfg_buf_append(CTX->buf, NULL, 0, (char)result);
}
<SS_DQ>\\x\{[0-9a-fA-F]+\} {
if (!hex_sequence(CTX, yytext+3, yyleng-3-1))
yyterminate();
}
<SS_DQ>\\x[0-9a-fA-F]{2} {
if (!hex_sequence(CTX, yytext+2, 2))
yyterminate();
}
<SS_DQ>\\n { cfg_buf_append(CTX->buf, NULL, 0, '\n'); }
<SS_DQ>\\r { cfg_buf_append(CTX->buf, NULL, 0, '\r'); }
<SS_DQ>\\t { cfg_buf_append(CTX->buf, NULL, 0, '\t'); }
<SS_DQ>\\b { cfg_buf_append(CTX->buf, NULL, 0, '\b'); }
<SS_DQ>\\f { cfg_buf_append(CTX->buf, NULL, 0, '\f'); }
<SS_DQ>\\a { cfg_buf_append(CTX->buf, NULL, 0, '\007'); }
<SS_DQ>\\e { cfg_buf_append(CTX->buf, NULL, 0, '\033'); }
<SS_DQ>\\(.|\n) {
cfg_buf_append(CTX->buf, NULL, 0, yytext[1]);
}
<SS_DQ>[^\\\"]+ {
cfg_buf_append(CTX->buf, yytext, yyleng, 0);
}
<SS_DQ>(.|\n) {
cfg_buf_append(CTX->buf, NULL, 0, yytext[0]);
}
<SS_DQ><<EOF>> {
cfg_syn_error(CTX, CFG_ERR_SYN, yylloc, "unterminated double-quoted string");
yyterminate();
}
/* single-quoted word ('...') */
\' {
cfg_buf_resize(CTX->buf, 0);
BEGIN(SS_SQ);
}
<SS_SQ>\' {
cfg_buf_content(CTX->buf, &yylval->cpString, NULL, NULL);
BEGIN(INITIAL);
return T_STRING;
}
<SS_SQ>\\\n[ \t]* {
/* no-op */
}
<SS_SQ>\\[\\\'] {
cfg_buf_append(CTX->buf, NULL, 0, yytext[1]);
}
<SS_SQ>\\[^\\\'] {
cfg_buf_append(CTX->buf, yytext, yyleng, 0);
}
<SS_SQ>[^\\\']+ {
cfg_buf_append(CTX->buf, yytext, yyleng, 0);
}
<SS_SQ>(.|\n) {
cfg_buf_append(CTX->buf, NULL, 0, yytext[0]);
}
<SS_SQ><<EOF>> {
cfg_syn_error(CTX, CFG_ERR_SYN, yylloc, "unterminated single-quoted string");
yyterminate();
}
/* flexible-quoted word (q(.)[^\1]\1) */
"q"{FQDEL} {
cfg_buf_resize(CTX->buf, 0);
nQuoteOpen = 1;
cQuoteOpen = yytext[1];
cQuoteClose = closing_brace(yytext[1]);
BEGIN(SS_FQ);
}
<SS_FQ>\\{FQDEL} {
if (yytext[1] == cQuoteOpen || yytext[1] == cQuoteClose)
cfg_buf_append(CTX->buf, NULL, 0, yytext[1]);
else
cfg_buf_append(CTX->buf, yytext, yyleng, 0);
}
<SS_FQ>\\\n[ \t]* {
/* no-op */
}
<SS_FQ>{FQDELN} {
cfg_buf_append(CTX->buf, yytext, yyleng, 0);
}
<SS_FQ>(.|\n) {
if (yytext[0] == cQuoteOpen || yytext[0] == cQuoteClose) {
if (cQuoteOpen != cQuoteClose)
nQuoteOpen += (yytext[0] == cQuoteOpen ? 1 : -1);
else
nQuoteOpen = ((nQuoteOpen + 1) % 2);
}
if (yytext[0] == cQuoteClose && nQuoteOpen == 0) {
cfg_buf_content(CTX->buf, &yylval->cpString, NULL, NULL);
BEGIN(INITIAL);
return T_STRING;
}
else
cfg_buf_append(CTX->buf, NULL, 0, yytext[0]);
}
<SS_FQ><<EOF>> {
cfg_syn_error(CTX, CFG_ERR_SYN, yylloc, "unterminated flexible-quoted string");
yyterminate();
}
/* special tokens */
";" { return T_SEP; }
"{" { return T_OPEN; }
"}" { return T_CLOSE; }
/* plain text word */
\\\n[ \t]* {
/* no-op */
}
(.|\n) {
cfg_buf_resize(CTX->buf, 0);
cfg_buf_append(CTX->buf, NULL, 0, yytext[0]);
BEGIN(SS_PT);
}
<SS_PT>\\\n[ \t]* {
/* no-op */
}
<SS_PT>[^ \t\n;{}\\"']+ {
cfg_buf_append(CTX->buf, yytext, yyleng, 0);
}
<SS_PT>(.|\n) {
cfg_buf_content(CTX->buf, &yylval->cpString, NULL, NULL);
yyless(0);
BEGIN(INITIAL);
return T_STRING;
}
<SS_PT><<EOF>> {
cfg_buf_content(CTX->buf, &yylval->cpString, NULL, NULL);
BEGIN(INITIAL);
return T_STRING;
}
<INITIAL><<EOF>> {
/* end of scanning */
yyterminate();
}
%%
/* external scanner state transitions */
void cfg_syn_scan_push(cfg_syn_ctx_t *ctx, const char *state);
void cfg_syn_scan_push(cfg_syn_ctx_t *ctx, const char *state)
{
if (strcmp(state, "SS_SQ") == 0)
yy_push_state(SS_SQ, ctx->yyscan);
}
void cfg_syn_scan_pop(cfg_syn_ctx_t *ctx);
void cfg_syn_scan_pop(cfg_syn_ctx_t *ctx)
{
yy_pop_state(ctx->yyscan);
}
/* buffer-based input routine */
static int yyinput(cfg_syn_ctx_t *ctx, char *buf, int max_size)
{
int n;
n = (ctx->inputbuf + ctx->inputlen - ctx->inputptr);
if (n > max_size)
n = max_size;
if (n <= 0)
return YY_NULL;
memcpy(buf, ctx->inputptr, n);
ctx->inputptr += n;
return n;
}
/* closing brace */
static char closing_brace(char open)
{
static struct {
char open;
char close;
} openclose[] = {
{ '(', ')' },
{ '{', '}' },
{ '{', ']' },
{ '<', '>' },
};
int i;
for (i = 0; i < sizeof(openclose)/sizeof(openclose[0]); i++) {
if (openclose[i].open == open)
return (char)openclose[i].close;
}
return open;
}
/* convert a hex digit into a nibble */
static int hex_nibble(const char hex)
{
unsigned char nibble;
if (hex >= '0' && hex <= '9')
nibble = (unsigned char)(hex - '0');
else if (hex >= 'a' && hex <= 'f')
nibble = (unsigned char)(hex - 'a' + 10);
else if (hex >= 'A' && hex <= 'F')
nibble = (unsigned char)(hex - 'A' + 10);
else
nibble = -1;
return nibble;
}
/* convert a hex digit sequence into an octet stream */
static int hex_sequence(cfg_syn_ctx_t *ctx, const char *in_ptr, size_t in_len)
{
int i;
int c;
if (in_len % 2 != 0) {
c = hex_nibble(in_ptr[0]);
cfg_buf_append(ctx->buf, NULL, 0, (char)c);
in_ptr++;
in_len--;
}
for (i = 0; in_len > 0; i++) {
c = ((hex_nibble(in_ptr[0]) << 4) | (hex_nibble(in_ptr[1])));
cfg_buf_append(ctx->buf, NULL, 0, (char)c);
in_ptr += 2;
in_len -= 2;
}
return 1;
}