OSSP CVS Repository

ossp - Check-in [4879]
Not logged in
[Honeypot]  [Browse]  [Home]  [Login]  [Reports
[Search]  [Ticket]  [Timeline
  [Patchset]  [Tagging/Branching

Check-in Number: 4879
Date: 2004-Nov-28 18:23:45 (local)
2004-Nov-28 17:23:45 (UTC)
User:rse
Branch:
Comment: Replace fixed-size token buffer in scanner by a dynamic buffer (cfg_buf_t). This eliminates the old 1024 size limit on tokens and makes the scanner more robust.
Tickets:
Inspections:
Files:
ossp-pkg/cfg/ChangeLog      1.25 -> 1.26     5 inserted, 0 deleted
ossp-pkg/cfg/TODO      1.6 -> 1.7     3 inserted, 4 deleted
ossp-pkg/cfg/cfg_syn.c      1.22 -> 1.23     10 inserted, 0 deleted
ossp-pkg/cfg/cfg_syn.h      1.7 -> 1.8     2 inserted, 0 deleted
ossp-pkg/cfg/cfg_syn_scan.l      added-> 1.21

ossp-pkg/cfg/ChangeLog 1.25 -> 1.26

--- ChangeLog    2004/11/28 14:17:52     1.25
+++ ChangeLog    2004/11/28 17:23:45     1.26
@@ -10,6 +10,11 @@
 
  Changes between 0.9.5 and 0.9.6 (27-Nov-2004 to xx-Dec-2004):
 
+   *) Replace fixed-size token buffer in scanner by a dynamic
+      buffer (cfg_buf_t). This eliminates the old 1024 size limit
+      on tokens and makes the scanner more robust.
+      [Ralf S. Engelschall <rse@engelschall.com>]
+
    *) Fix annotational error reporting in scanner/parser.
       [Ralf S. Engelschall <rse@engelschall.com>]
 


ossp-pkg/cfg/TODO 1.6 -> 1.7

--- TODO 2004/11/28 13:54:13     1.6
+++ TODO 2004/11/28 17:23:45     1.7
@@ -22,10 +22,9 @@
   o add line tracking support
   o add pre-processor with at least includes
   o add config tree syntax verification!
-  o cfg.tok: in scanner: use an combination of dynamic buffer and atomic
-    symbol/token sub-library to allow first tokens of mostly arbitrary
-    size and then to store the tokens redundancy-free.
-    alternative: replace caStr/cpStr durch yyless/yymore usage??
+  o cfg.tok: in scanner: use atomic symbol/token sub-library to allow
+    first tokens of mostly arbitrary size and then to store the tokens
+    redundancy-free. alternative: replace caStr/cpStr durch yyless/yymore usage??
   o named parameters [--]name=value (options)??
   o command line query tool (based on cfg_test.c)
   o optional OSSP ex support


ossp-pkg/cfg/cfg_syn.c 1.22 -> 1.23

--- cfg_syn.c    2004/11/28 14:17:52     1.22
+++ cfg_syn.c    2004/11/28 17:23:45     1.23
@@ -57,6 +57,8 @@
     size_t err_len)
 {
     cfg_syn_ctx_t ctx;
+    cfg_buf_t *buf;
+    cfg_rc_t rc;
     void *yyscan;
 
     /* argument sanity checking */
@@ -67,6 +69,10 @@
     cfg_syn_lex_init(&yyscan);
     cfg_syn_set_extra(&ctx, yyscan);
 
+    /* initialize temporary buffer context */
+    if ((rc = cfg_buf_create(&buf)) != CFG_OK)
+        return rc;
+
     /* establish our own context which is passed
        through the parser and scanner */
     ctx.inputptr = in_ptr;
@@ -75,6 +81,7 @@
     ctx.cfg      = cfg;
     ctx.node     = NULL;
     ctx.rv       = CFG_OK;
+    ctx.buf      = buf;
     ctx.err_buf  = err_buf;
     ctx.err_len  = err_len;
     ctx.yyscan   = yyscan;
@@ -83,6 +90,9 @@
     if (cfg_syn_parse(&ctx))
         ctx.rv = (ctx.rv == CFG_OK ? CFG_ERR_INT : ctx.rv);
 
+    /* destroy temporary buffer */
+    cfg_buf_destroy(buf);
+
     /* destroy scanner */
     cfg_syn_lex_destroy(yyscan);
 


ossp-pkg/cfg/cfg_syn.h 1.7 -> 1.8

--- cfg_syn.h    2004/11/20 12:02:32     1.7
+++ cfg_syn.h    2004/11/28 17:23:45     1.8
@@ -37,6 +37,7 @@
 #include "cfg.h"
 #include "cfg_grid.h"
 #include "cfg_node.h"
+#include "cfg_buf.h"
 
 /* internal specification scanner/parser context */
 typedef struct {
@@ -46,6 +47,7 @@
     cfg_t        *cfg;        /* the configuration object */
     cfg_node_t   *node;       /* top-level/root/result channel */
     cfg_rc_t      rv;         /* return value */
+    cfg_buf_t    *buf;        /* temporary buffer */
     char         *err_buf;    /* error buffer pointer */
     size_t        err_len;    /* error buffer length */
     void         *yyscan;     /* Flex scanner context */


ossp-pkg/cfg/cfg_syn_scan.l -> 1.21

*** /dev/null    Sat Nov 23 01:31:01 2024
--- -    Sat Nov 23 01:31:02 2024
***************
*** 0 ****
--- 1,381 ----
+ %{
+ /*
+ **  OSSP cfg - Configuration Parsing
+ **  Copyright (c) 2002-2004 Ralf S. Engelschall <rse@engelschall.com>
+ **  Copyright (c) 2002-2004 The OSSP Project (http://www.ossp.org/)
+ **  Copyright (c) 2002-2004 Cable & Wireless (http://www.cw.com/)
+ **
+ **  This file is part of OSSP cfg, a configuration parsing
+ **  library which can be found at http://www.ossp.org/pkg/lib/cfg/.
+ **
+ **  Permission to use, copy, modify, and distribute this software for
+ **  any purpose with or without fee is hereby granted, provided that
+ **  the above copyright notice and this permission notice appear in all
+ **  copies.
+ **
+ **  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ **  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ **  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ **  IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
+ **  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ **  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ **  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ **  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ **  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ **  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ **  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ **  SUCH DAMAGE.
+ **
+ **  cfg_syn_scan.l: regular grammar specification for GNU Flex
+ **
+ **  ATTENTION: This requires GNU Flex 2.5.10 or newer!
+ */
+ 
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+ 
+ #include "cfg.h"
+ #include "cfg_global.h"
+ #include "cfg_buf.h"
+ #include "cfg_syn.h"
+ #include "cfg_syn_parse.h"
+ 
+ /* how to find our own context */
+ #define CTX ((cfg_syn_ctx_t *)yyget_extra(yyscanner))
+ 
+ /* provide own input handling */
+ #undef  YY_INPUT
+ #define YY_INPUT(buf,result,max_size) (result = yyinput(CTX, buf, max_size))
+ static int yyinput(cfg_syn_ctx_t *ctx, char *buf, int max_size);
+ 
+ /* location tracking */
+ #define YY_USER_INIT \
+     yylloc->first = 0; \
+     yylloc->last  = 0;
+ #define YY_USER_ACTION \
+     yylloc->first = yylloc->last; \
+     yylloc->last += yyleng;
+ #define YY_USER_ACTION_ROLLBACK \
+     yylloc->last  = yylloc->first
+ 
+ static char closing_brace(char open);
+ static int hex_nibble(const char hex);
+ static int hex_sequence(cfg_syn_ctx_t *ctx, const char *in_ptr, size_t in_len);
+ 
+ %}
+ 
+ /* scanner options */
+ %pointer
+ %option stack
+ %option reentrant
+ %option bison-bridge
+ %option bison-locations
+ %option never-interactive
+ %option noyywrap
+ %option nounput
+ %option noyy_top_state
+ 
+ /* scanner states */
+ %x SS_DQ
+ %x SS_SQ
+ %x SS_FQ
+ %x SS_PT
+ %x SS_CO_C
+ 
+ /* the delimiting character for flexible quoting has to one a
+    special character c, i.e., one of !"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ or
+    in C code: isprint(c) && !isspace(c) && !iscntrl(c) && !isalpha(i) && !isdigit(i) */
+ FQDEL   [\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]
+ FQDELN [^\!\"\#\$\%\&\'\(\)\*\+\,\-\.\/\:\;\<\=\>\?\@\[\\\]\^\_\`\{\|\}\~]
+ 
+ %%
+ 
+     /* local variables */
+     int   nCommentOpen = 0;
+     int   nQuoteOpen = 0;
+     char  cQuoteOpen = '\0';
+     char  cQuoteClose = '\0';
+ 
+     /* whitespaces */
+ [ \t\n]+ {
+     /* no-op */
+ }
+ 
+     /* C-style block comment */
+ "/*" {
+     nCommentOpen = 1;
+     BEGIN(SS_CO_C);
+ }
+ <SS_CO_C>"/*" {
+     nCommentOpen++;
+ }
+ <SS_CO_C>"*/" {
+     nCommentOpen--;
+     if (nCommentOpen == 0)
+         BEGIN(INITIAL);
+ }
+ <SS_CO_C>(.|\n) {
+     /* no-op */
+ }
+ <SS_CO_C><<EOF>> {
+     cfg_syn_error(CTX, CFG_ERR_SYN, yylloc, "unterminated C-style block comment");
+     return 0;
+ }
+ 
+     /* C++-style EOL comment */
+ "//"[^\n]* {
+     /* no-op */
+ }
+ 
+     /* Shell-style EOL comment */
+ "#"[^\n]* {
+     /* no-op */
+ }
+ 
+     /* double-quoted word ("...") */
+ \" {
+     cfg_buf_resize(CTX->buf, 0);
+     BEGIN(SS_DQ);
+ }
+ <SS_DQ>\" {
+     cfg_buf_content(CTX->buf, &yylval->cpString, NULL, NULL);
+     BEGIN(INITIAL);
+     return T_STRING;
+ }
+ <SS_DQ>\\\n[ \t]* {
+     /* no-op */
+ }
+ <SS_DQ>\\[0-7]{1,3} {
+     unsigned int result;
+     (void)sscanf(yytext+1, "%o", &result);
+     if (result > 0xff) {
+         cfg_syn_error(CTX, CFG_ERR_SYN, yylloc, "escape sequence out of bound");
+         return 0;
+     }
+     cfg_buf_append(CTX->buf, NULL, 0, (char)result);
+ }
+ <SS_DQ>\\x\{[0-9a-fA-F]+\} {
+     if (!hex_sequence(CTX, yytext+3, yyleng-3-1))
+         return 0;
+ }
+ <SS_DQ>\\x[0-9a-fA-F]{2} {
+     if (!hex_sequence(CTX, yytext+2, 2))
+         return 0;
+ }
+ <SS_DQ>\\n { cfg_buf_append(CTX->buf, NULL, 0, '\n');   }
+ <SS_DQ>\\r { cfg_buf_append(CTX->buf, NULL, 0, '\r');   }
+ <SS_DQ>\\t { cfg_buf_append(CTX->buf, NULL, 0, '\t');   }
+ <SS_DQ>\\b { cfg_buf_append(CTX->buf, NULL, 0, '\b');   }
+ <SS_DQ>\\f { cfg_buf_append(CTX->buf, NULL, 0, '\f');   }
+ <SS_DQ>\\a { cfg_buf_append(CTX->buf, NULL, 0, '\007'); }
+ <SS_DQ>\\e { cfg_buf_append(CTX->buf, NULL, 0, '\033'); }
+ <SS_DQ>\\(.|\n) {
+     cfg_buf_append(CTX->buf, NULL, 0, yytext[1]);
+ }
+ <SS_DQ>[^\\\"]+ {
+     cfg_buf_append(CTX->buf, yytext, 0, 0);
+ }
+ <SS_DQ>(.|\n) {
+     cfg_buf_append(CTX->buf, NULL, 0, yytext[1]);
+ }
+ <SS_DQ><<EOF>> {
+     cfg_syn_error(CTX, CFG_ERR_SYN, yylloc, "unterminated double-quoted string");
+     return 0;
+ }
+ 
+     /* single-quoted word ('...') */
+ \' {
+     cfg_buf_resize(CTX->buf, 0);
+     BEGIN(SS_SQ);
+ }
+ <SS_SQ>\' {
+     cfg_buf_content(CTX->buf, &yylval->cpString, NULL, NULL);
+     BEGIN(INITIAL);
+     return T_STRING;
+ }
+ <SS_SQ>\\\n[ \t]* {
+     /* no-op */
+ }
+ <SS_SQ>\\[\\\'] {
+     cfg_buf_append(CTX->buf, NULL, 0, yytext[1]);
+ }
+ <SS_SQ>\\[^\\\'] {
+     cfg_buf_append(CTX->buf, yytext, 2, 0);
+ }
+ <SS_SQ>[^\\\']+ {
+     cfg_buf_append(CTX->buf, yytext, 0, 0);
+ }
+ <SS_SQ>(.|\n) {
+     cfg_buf_append(CTX->buf, NULL, 0, yytext[1]);
+ }
+ <SS_SQ><<EOF>> {
+     cfg_syn_error(CTX, CFG_ERR_SYN, yylloc, "unterminated single-quoted string");
+     return 0;
+ }
+ 
+     /* flexible-quoted word (q(.)[^\1]\1) */
+ "q"{FQDEL} {
+     cfg_buf_resize(CTX->buf, 0);
+     nQuoteOpen = 1;
+     cQuoteOpen = yytext[1];
+     cQuoteClose = closing_brace(yytext[1]);
+     BEGIN(SS_FQ);
+ }
+ <SS_FQ>\\{FQDEL} {
+     if (yytext[1] == cQuoteOpen || yytext[1] == cQuoteClose)
+         cfg_buf_append(CTX->buf, NULL, 0, yytext[1]);
+     else
+         cfg_buf_append(CTX->buf, yytext, 2, 0);
+ }
+ <SS_FQ>\\\n[ \t]* {
+     /* no-op */
+ }
+ <SS_FQ>{FQDELN} {
+     cfg_buf_append(CTX->buf, yytext, 0, 0);
+ }
+ <SS_FQ>(.|\n) {
+     if (yytext[0] == cQuoteOpen || yytext[0] == cQuoteClose) {
+         if (cQuoteOpen != cQuoteClose)
+             nQuoteOpen += (yytext[0] == cQuoteOpen ? 1 : -1);
+         else
+             nQuoteOpen = ((nQuoteOpen + 1) % 2);
+     }
+     if (yytext[0] == cQuoteClose && nQuoteOpen == 0) {
+         cfg_buf_content(CTX->buf, &yylval->cpString, NULL, NULL);
+         BEGIN(INITIAL);
+         return T_STRING;
+     }
+     else
+         cfg_buf_append(CTX->buf, NULL, 0, yytext[0]);
+ }
+ <SS_FQ><<EOF>> {
+     cfg_syn_error(CTX, CFG_ERR_SYN, yylloc, "unterminated flexible-quoted string");
+     return 0;
+ }
+ 
+    /* special tokens */
+ ";" { return T_SEP;   }
+ "{" { return T_OPEN;  }
+ "}" { return T_CLOSE; }
+ 
+     /* plain text word */
+ \\\n[ \t]* {
+     /* no-op */
+ }
+ (.|\n) {
+     cfg_buf_resize(CTX->buf, 0);
+     cfg_buf_append(CTX->buf, NULL, 0, yytext[0]);
+     BEGIN(SS_PT);
+ }
+ <SS_PT>\\\n[ \t]* {
+     /* no-op */
+ }
+ <SS_PT>[^ \t\n;{}\\"']+ {
+     cfg_buf_append(CTX->buf, yytext, 0, 0);
+ }
+ <SS_PT>(.|\n) {
+     cfg_buf_content(CTX->buf, &yylval->cpString, NULL, NULL);
+     yyless(0);
+     BEGIN(INITIAL);
+     return T_STRING;
+ }
+ <SS_PT><<EOF>> {
+     cfg_buf_content(CTX->buf, &yylval->cpString, NULL, NULL);
+     BEGIN(INITIAL);
+     return T_STRING;
+ }
+ 
+ <INITIAL><<EOF>> {
+     /* end of scanning */
+     yyterminate();
+ }
+ 
+ %%
+ 
+ /* external scanner state transitions */
+ void cfg_syn_scan_push(cfg_syn_ctx_t *ctx, const char *state);
+ void cfg_syn_scan_push(cfg_syn_ctx_t *ctx, const char *state)
+ {
+     if (strcmp(state, "SS_SQ") == 0)
+         yy_push_state(SS_SQ, ctx->yyscan);
+ }
+ void cfg_syn_scan_pop(cfg_syn_ctx_t *ctx);
+ void cfg_syn_scan_pop(cfg_syn_ctx_t *ctx)
+ {
+     yy_pop_state(ctx->yyscan);
+ }
+ 
+ /* buffer-based input routine */
+ static int yyinput(cfg_syn_ctx_t *ctx, char *buf, int max_size)
+ {
+     int n;
+ 
+     n = (ctx->inputbuf + ctx->inputlen - ctx->inputptr);
+     if (n > max_size)
+         n = max_size;
+     if (n <= 0)
+         return YY_NULL;
+     memcpy(buf, ctx->inputptr, n);
+     ctx->inputptr += n;
+     return n;
+ }
+ 
+ /* closing brace */
+ static char closing_brace(char open)
+ {
+     static struct {
+         char open;
+         char close;
+     } openclose[] = {
+         { '(', ')' },
+         { '{', '}' },
+         { '{', ']' },
+         { '<', '>' },
+     };
+     int i;
+ 
+     for (i = 0; i < sizeof(openclose)/sizeof(openclose[0]); i++) {
+         if (openclose[i].open == open)
+             return (char)openclose[i].close;
+     }
+     return open;
+ }
+ 
+ /* convert a hex digit into a nibble */
+ static int hex_nibble(const char hex)
+ {
+     unsigned char nibble;
+ 
+     if (hex >= '0' && hex <= '9')
+         nibble = (unsigned char)(hex - '0');
+     else if (hex >= 'a' && hex <= 'f')
+         nibble = (unsigned char)(hex - 'a' + 10);
+     else if (hex >= 'A' && hex <= 'F')
+         nibble = (unsigned char)(hex - 'A' + 10);
+     else
+         nibble = -1;
+     return nibble;
+ }
+ 
+ /* convert a hex digit sequence into an octet stream */
+ static int hex_sequence(cfg_syn_ctx_t *ctx, const char *in_ptr, size_t in_len)
+ {
+     int i;
+     int c;
+ 
+     if (in_len % 2 != 0) {
+         c = hex_nibble(in_ptr[0]);
+         cfg_buf_append(ctx->buf, NULL, 0, (char)c);
+         in_ptr++;
+         in_len--;
+     }
+     for (i = 0; in_len > 0; i++) {
+         c = ((hex_nibble(in_ptr[0]) << 4) | (hex_nibble(in_ptr[1])));
+         cfg_buf_append(ctx->buf, NULL, 0, (char)c);
+         in_ptr += 2;
+         in_len -= 2;
+     }
+     return 1;
+ }
+ 

CVSTrac 2.0.1