ossp-pkg/cfg/cfg_syn_scan.l
1.8
%{
/*
** OSSP cfg - Configuration Parsing
** Copyright (c) 1999-2002 Ralf S. Engelschall <rse@engelschall.com>
** Copyright (c) 1999-2002 The OSSP Project (http://www.ossp.org/)
** Copyright (c) 2001-2002 Cable & Wireless Deutschland (http://www.cw.com/de/)
**
** This file is part of OSSP cfg, a configuration parsing
** library which can be found at http://www.ossp.org/pkg/lib/cfg/.
**
** Permission to use, copy, modify, and distribute this software for
** any purpose with or without fee is hereby granted, provided that
** the above copyright notice and this permission notice appear in all
** copies.
**
** THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
** WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
** IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
** CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
** USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
** OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
** OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
** SUCH DAMAGE.
**
** cfg_syn_scan.l: regular grammar specification for GNU Flex
**
** ATTENTION: This requires GNU Flex 2.5.6 or newer!
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "cfg.h"
#include "cfg_syn.h"
#include "cfg_syn_parse.h"
/* how to find our own context */
#define CTX ((cfg_syn_ctx_t *)yyget_extra(yy_globals))
/* provide own input handling */
#define YY_NO_UNPUT 1
#undef YY_INPUT
#define YY_INPUT(buf,result,max_size) (result = yyinput(CTX, buf, max_size))
static int yyinput(cfg_syn_ctx_t *ctx, char *buf, int max_size);
/* location tracking */
#define YY_USER_INIT \
yylloc->first = 0; \
yylloc->last = 0;
#define YY_USER_ACTION \
yylloc->first = yylloc->last; \
yylloc->last += yyleng;
#define YY_USER_ACTION_ROLLBACK \
yylloc->last = yylloc->first
static char closing_brace(char open);
static int hex_nibble(const char hex);
static int hex_sequence(char *out_ptr, size_t out_len, const char *in_ptr, size_t in_len);
%}
/* scanner options */
%pointer
%option stack
%option reentrant-bison
%option never-interactive
%option noyywrap
/* scanner states */
%x SS_DQ
%x SS_SQ
%x SS_FQ
%x SS_PT
%x SS_CO_C
%%
/* local variables */
char caStr[1024];
char *cpStr = NULL;
int nQuoteOpen = 0;
char cQuoteOpen = '\0';
char cQuoteClose = '\0';
/* whitespaces */
[ \t\n]+ {
/* no-op */
}
/* C-style block comment */
"/*" {
BEGIN(SS_CO_C);
}
<SS_CO_C>"*/" {
BEGIN(INITIAL);
}
<SS_CO_C>(.|\n) {
/* no-op */
}
<SS_CO_C><<EOF>> {
cfg_syn_error(CTX, CFG_ERR_SYN, yylloc, "unterminated C-style block comment");
return 0;
}
/* C++-style EOL comment */
"//"[^\n]* {
/* no-op */
}
/* Shell-style EOL comment */
"#"[^\n]* {
/* no-op */
}
/* double-quoted word ("...") */
\" {
cpStr = caStr;
BEGIN(SS_DQ);
}
<SS_DQ>\" {
*cpStr = '\0';
yylval->cpString = strdup(caStr);
BEGIN(INITIAL);
return T_STRING;
}
<SS_DQ>\\\n[ \t]* {
/* no-op */
}
<SS_DQ>\\[0-7]{1,3} {
unsigned int result;
(void)sscanf(yytext+1, "%o", &result);
if (result > 0xff) {
cfg_syn_error(CTX, CFG_ERR_SYN, yylloc, "escape sequence out of bound");
return 0;
}
*cpStr++ = result;
}
<SS_DQ>\\x\{[0-9a-fA-F]+\} {
cpStr += hex_sequence(cpStr, sizeof(caStr)-(cpStr-caStr), yytext+3, yyleng-3-1);
}
<SS_DQ>\\x[0-9a-fA-F]{2} {
cpStr += hex_sequence(cpStr, sizeof(caStr)-(cpStr-caStr), yytext+2, 2);
}
<SS_DQ>\\n { *cpStr++ = '\n'; }
<SS_DQ>\\r { *cpStr++ = '\r'; }
<SS_DQ>\\t { *cpStr++ = '\t'; }
<SS_DQ>\\b { *cpStr++ = '\b'; }
<SS_DQ>\\f { *cpStr++ = '\f'; }
<SS_DQ>\\a { *cpStr++ = '\007'; }
<SS_DQ>\\e { *cpStr++ = '\033'; }
<SS_DQ>\\(.|\n) {
*cpStr++ = yytext[1];
}
<SS_DQ>[^\\\"]+ {
char *cp = yytext;
while (*cp != '\0')
*cpStr++ = *cp++;
}
<SS_DQ>(.|\n) {
*cpStr++ = yytext[1];
}
<SS_DQ><<EOF>> {
cfg_syn_error(CTX, CFG_ERR_SYN, yylloc, "unterminated double-quoted string");
return 0;
}
/* single-quoted word ('...') */
\' {
cpStr = caStr;
BEGIN(SS_SQ);
}
<SS_SQ>\' {
*cpStr = '\0';
yylval->cpString = strdup(caStr);
BEGIN(INITIAL);
return T_STRING;
}
<SS_SQ>\\\n[ \t]* {
/* no-op */
}
<SS_SQ>\\[\\\'] {
*cpStr++ = yytext[1];
}
<SS_SQ>\\[^\\\'] {
*cpStr++ = yytext[0];
*cpStr++ = yytext[1];
}
<SS_SQ>[^\\\']+ {
char *cp = yytext;
while (*cp != '\0')
*cpStr++ = *cp++;
}
<SS_SQ>(.|\n) {
*cpStr++ = yytext[1];
}
<SS_SQ><<EOF>> {
cfg_syn_error(CTX, CFG_ERR_SYN, yylloc, "unterminated single-quoted string");
return 0;
}
/* flexible-quoted word (q(.)[^\1]\1)
the delimiting character has to one a special character c, i.e.,
one of !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ or in C code:
isprint(c) && !isspace(c) && !iscntrl(c) && !isalpha(i) && !isdigit(i)
*/
"q"[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~] {
cpStr = caStr;
nQuoteOpen = 1;
cQuoteOpen = yytext[1];
cQuoteClose = closing_brace(yytext[1]);
BEGIN(SS_FQ);
}
<SS_FQ>\\[\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~] {
if (yytext[1] == cQuoteOpen || yytext[1] == cQuoteClose) {
*cpStr++ = yytext[1];
}
else {
*cpStr++ = yytext[0];
*cpStr++ = yytext[1];
}
}
<SS_FQ>\\\n[ \t]* {
/* no-op */
}
<SS_FQ>[^\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~] {
char *cp = yytext;
while (*cp != '\0')
*cpStr++ = *cp++;
}
<SS_FQ>(.|\n) {
if (yytext[0] == cQuoteOpen || yytext[0] == cQuoteClose) {
if (cQuoteOpen != cQuoteClose)
nQuoteOpen += (yytext[0] == cQuoteOpen ? 1 : -1);
else
nQuoteOpen = ((nQuoteOpen + 1) % 2);
}
if (yytext[0] == cQuoteClose && nQuoteOpen == 0) {
*cpStr = '\0';
yylval->cpString = strdup(caStr);
BEGIN(INITIAL);
return T_STRING;
}
else
*cpStr++ = yytext[0];
}
<SS_FQ><<EOF>> {
cfg_syn_error(CTX, CFG_ERR_SYN, yylloc, "unterminated flexible-quoted string");
return 0;
}
/* special tokens */
";" { return T_SEP; }
"{" { return T_OPEN; }
"}" { return T_CLOSE; }
/* plain text word */
\\\n[ \t]* {
/* no-op */
}
(.|\n) {
cpStr = caStr;
*cpStr++ = yytext[0];
BEGIN(SS_PT);
}
<SS_PT>\\\n[ \t]* {
/* no-op */
}
<SS_PT>[^ \t\n;{}\\"']+ {
char *cp = yytext;
while (*cp != '\0')
*cpStr++ = *cp++;
}
<SS_PT>(.|\n) {
*cpStr = '\0';
yylval->cpString = strdup(caStr);
yyless(0);
BEGIN(INITIAL);
return T_STRING;
}
%%
/* external scanner state transitions */
void cfg_syn_scan_push(cfg_syn_ctx_t *ctx, const char *state);
void cfg_syn_scan_push(cfg_syn_ctx_t *ctx, const char *state)
{
if (strcmp(state, "SS_SQ") == 0)
yy_push_state(SS_SQ, ctx->yyscan);
}
void cfg_syn_scan_pop(cfg_syn_ctx_t *ctx);
void cfg_syn_scan_pop(cfg_syn_ctx_t *ctx)
{
yy_pop_state(ctx->yyscan);
}
/* buffer-based input routine */
static int yyinput(cfg_syn_ctx_t *ctx, char *buf, int max_size)
{
int n;
n = (ctx->inputbuf + ctx->inputlen - ctx->inputptr);
if (n > max_size)
n = max_size;
if (n <= 0)
return YY_NULL;
memcpy(buf, ctx->inputptr, n);
ctx->inputptr += n;
return n;
}
/* closing brace */
static char closing_brace(char open)
{
static struct {
char open;
char close;
} openclose[] = {
{ '(', ')' },
{ '{', '}' },
{ '{', ']' },
{ '<', '>' },
};
int i;
for (i = 0; i < sizeof(openclose)/sizeof(openclose[0]); i++) {
if (openclose[i].open == open)
return (char)openclose[i].close;
}
return open;
}
/* convert a hex digit into a nibble */
static int hex_nibble(const char hex)
{
unsigned char nibble;
if (hex >= '0' && hex <= '9')
nibble = (unsigned char)(hex - '0');
else if (hex >= 'a' && hex <= 'f')
nibble = (unsigned char)(hex - 'a' + 10);
else if (hex >= 'A' && hex <= 'F')
nibble = (unsigned char)(hex - 'A' + 10);
else
nibble = -1;
return nibble;
}
/* convert a hex digit sequence into an octet stream */
static int hex_sequence(char *out_ptr, size_t out_len, const char *in_ptr, size_t in_len)
{
int i;
size_t out_max;
out_max = out_len;
if (in_len % 2 != 0) {
*out_ptr++ = hex_nibble(in_ptr[0]);
out_len--;
in_ptr++;
in_len--;
}
for (i = 0; in_len > 0 && out_len > 0; i++) {
*out_ptr++ = ((hex_nibble(in_ptr[0]) << 4) | (hex_nibble(in_ptr[1])));
out_len--;
in_ptr += 2;
in_len -= 2;
}
return (out_max - out_len);
}