Index: ossp-pkg/l2/l2.3 RCS File: /v/ossp/cvs/ossp-pkg/l2/Attic/l2.3,v rcsdiff -q -kk '-r1.1' '-r1.2' -u '/v/ossp/cvs/ossp-pkg/l2/Attic/l2.3,v' 2>/dev/null --- l2.3 2001/05/10 19:46:01 1.1 +++ l2.3 2001/05/10 20:00:31 1.2 @@ -1,5 +1,5 @@ .\" Automatically generated by Pod::Man version 1.02 -.\" Tue May 8 21:21:11 2001 +.\" Thu May 10 22:00:18 2001 .\" .\" Standard preamble: .\" ====================================================================== @@ -141,632 +141,16 @@ .TH l2 3 "08-May-2001" "L2 0.1.0" "Logging Library" .UC .SH "NAME" -\&\fBStr\fR \- String Library +\&\fBL2\fR \- Logging Library .SH "VERSION" .IX Header "VERSION" -Str \s-10.1.0 (08-May-2001)\s0 +L2 L2_VERSION_STR .SH "SYNOPSIS" .IX Header "SYNOPSIS" -\&\fBstr_len\fR, -\&\fBstr_copy\fR, -\&\fBstr_dup\fR, -\&\fBstr_concat\fR, -\&\fBstr_splice\fR, -\&\fBstr_compare\fR, -\&\fBstr_span\fR, -\&\fBstr_locate\fR, -\&\fBstr_token\fR, -\&\fBstr_parse\fR, -\&\fBstr_format\fR, -\&\fBstr_hash\fR, -\&\fBstr_base64\fR. +\&... .SH "DESCRIPTION" .IX Header "DESCRIPTION" -The \fBStr\fR library is a generic string library written in \s-1ANSI\s0 C which -provides functions for handling, matching, parsing, searching and -formatting of C strings. So it can be considered as a superset of \s-1POSIX\s0 -\&\fIstring\fR\|(3), but its main intention is to provide a more convinient and -compact \s-1API\s0 plus a more generalized functionality. +The \fBL2\fR library is... .SH "FUNCTIONS" .IX Header "FUNCTIONS" The following functions are provided by the \fBStr\fR \s-1API:\s0 -.Ip "str_size_t \fBstr_len\fR(const char *\fIs\fR);" 4 -.IX Item "str_size_t str_len(const char *s);" -This function determines the length of string \fIs\fR, i.e., the number -of characters starting at \fIs\fR that precede the terminating \f(CW\*(C`NUL\*(C'\fR -character. It returns \f(CW\*(C`NULL\*(C'\fR if \fIs\fR is \f(CW\*(C`NULL\*(C'\fR. -.Ip "char *\fBstr_copy\fR(char *\fIs\fR, const char *\fIt\fR, size_t \fIn\fR);" 4 -.IX Item "char *str_copy(char *s, const char *t, size_t n);" -This copies the characters in string \fIt\fR into the string \fIs\fR, but never more -than \fIn\fR characters (if \fIn\fR is greater than \f(CW\*(C`0\*(C'\fR). The two involved strings -can overlap and the characters in \fIs\fR are always \f(CW\*(C`NUL\*(C'\fR\-terminated. The -string \fIs\fR has to be large enough to hold all characters to be copied. -function returns \f(CW\*(C`NULL\*(C'\fR if \fIs\fR or \fIt\fR are \f(CW\*(C`NULL\*(C'\fR. Else it returns the -pointer to the written \f(CW\*(C`NUL\*(C'\fR\-terminating character in \fIs\fR. -.Ip "char *\fBstr_dup\fR(const char *\fIs\fR, str_size_t \fIn\fR);" 4 -.IX Item "char *str_dup(const char *s, str_size_t n);" -This returns a copy of the characters in string \fIs\fR, but never more than \fIn\fR -characters if \fIn\fR is greater than \f(CW\*(C`0\*(C'\fR. It returns \f(CW\*(C`NULL\*(C'\fR if \fIs\fR is -\&\f(CW\*(C`NULL\*(C'\fR. The returned string has to be deallocated later with \fIfree\fR\|(3). -.Ip "char *\fBstr_concat\fR(char *\fIs\fR, ...);" 4 -.IX Item "char *str_concat(char *s, ...);" -This functions concatenates the characters of all string arguments into a new -allocated string and returns this new string. If \fIs\fR is \f(CW\*(C`NULL\*(C'\fR the function -returns \f(CW\*(C`NULL\*(C'\fR. Else it returns the pointer to the written final -\&\f(CW\*(C`NUL\*(C'\fR\-terminating character in \fIs\fR. The returned string later has to be -deallicated with \fIfree\fR\|(3). -.Ip "char *\fBstr_splice\fR(char *\fIs\fR, str_size_t \fIoff\fR, str_size_t \fIn\fR, char *\fIt\fR, str_size_t \fIm\fR);" 4 -.IX Item "char *str_splice(char *s, str_size_t off, str_size_t n, char *t, str_size_t m);" -This splices the string \fIt\fR into string \fIs\fR, i.e., the \fIn\fR characters -at offset \fIoff\fR in \fIs\fR are removed and at their location the string -\&\fIt\fR is inserted (or just the first \fIm\fR characters of \fIt\fR if \fIm\fR is -greater than \f(CW\*(C`0\*(C'\fR). It returns \f(CW\*(C`NULL\*(C'\fR if \fIs\fR or \fIt\fR are \f(CW\*(C`NULL\*(C'\fR. -Else the string \fIs\fR is returned. The function supports also the -situation where \fIt\fR is a sub-string of \fIs\fR as long as the area -\&\fIs+off\fR...\fIs+off+n\fR and \fIt\fR...\fIt+m\fR do not overlap. The caller -always has to make sure that enough room exists in \fIs\fR. -.Ip "int \fBstr_compare\fR(const char *\fIs\fR, const char *\fIt\fR, str_size_t \fIn\fR, int \fImode\fR);" 4 -.IX Item "int str_compare(const char *s, const char *t, str_size_t n, int mode);" -This performs a lexicographical comparison of the two strings \fIs\fR -and \fIt\fR (but never compares more than \fIn\fR characters of them) -and returns one of three return values: a value lower than \f(CW\*(C`0\*(C'\fR if -\&\fIs\fR is lexicographically lower than \fIt\fR, a vlue of exactly \f(CW\*(C`0\*(C'\fR -if \fIs\fR and \fIt\fR are equal and a value greater than \f(CW\*(C`0\*(C'\fR if \fIs\fR is -lexicographically higher than \fIt\fR. Per default (\fImode\fR is \f(CW\*(C`0\*(C'\fR) the -comparison is case-sensitive, but if \f(CW\*(C`STR_NOCASE\*(C'\fR is used for \fImode\fR -the comparison is done in a case-insensitive way. -.Ip "char *\fBstr_span\fR(const char *\fIs\fR, size_t \fIn\fR, const char *\fIcharset\fR, int \fImode\fR);" 4 -.IX Item "char *str_span(const char *s, size_t n, const char *charset, int mode);" -This functions spans a string \fIs\fR according to the characters specified in -\&\fIcharset\fR. If \fImode\fR is \f(CW\*(C`0\*(C'\fR, this means that \fIs\fR is spanned from left to -right starting at \fIs\fR (and ending either when reaching the terminating \f(CW\*(C`NUL\*(C'\fR -character or already after \fIn\fR spanned characters) as long as the characters -of \fIs\fR are contained in \fIcharset\fR. -.Sp -Alternatively one can use a \fImode\fR of \f(CW\*(C`STR_COMPLEMENT\*(C'\fR to indicate that \fIs\fR -is spanned as long as the characters of \fIs\fR are \fInot\fR contained in -\&\fIcharset\fR, i.e., \fIcharset\fR then specifies the complement of the spanning -characters. -.Sp -In both cases one can additionally \*(L"or\*(R" (with the C operator ``\f(CW\*(C`|\*(C'\fR'') -\&\f(CW\*(C`STR_RIGHT\*(C'\fR into \fImode\fR to indicate that the spanning is done right to -left starting at the terminating \f(CW\*(C`NUL\*(C'\fR character of \fIs\fR (and ending -either when reaching \fIs\fR or already after \fIn\fR spanned characters). -.Ip "char *\fBstr_locate\fR(const char *\fIs\fR, str_size_t \fIn\fR, const char *\fIt\fR);" 4 -.IX Item "char *str_locate(const char *s, str_size_t n, const char *t);" -This functions searches for the (smaller) string \fIt\fR inside (larger) string -\&\fIs\fR. If \fIn\fR is not \f(CW\*(C`0\*(C'\fR, the search is performed only inside the first \fIn\fR -characters of \fIs\fR. -.Ip "char *\fBstr_token\fR(char **\fIs\fR, const char *\fIdelim\fR, const char *\fIquote\fR, const char *\fIcomment\fR, int \fImode\fR);" 4 -.IX Item "char *str_token(char **s, const char *delim, const char *quote, const char *comment, int mode);" -This function considers the string \fIs\fR to consist of a sequence of -zero or more text tokens separated by spans of one or more characters -from the separator string \fIdelim\fR. However, text between matched pairs -of quotemarks (characters in \fIquote\fR) is treated as plain text, never -as delimiter (separator) text. Each call of this function returns a -pointer to the first character of the first token of \fIs\fR. The token is -\&\f(CW\*(C`NUL\*(C'\fR\-terminated, i.e., the string \fIs\fR is processed in a destructive -way. If there are quotation marks or escape sequences, the input -string is rewritten with quoted sections and escape sequences properly -interpreted. -.Sp -This function keeps track of its parsing position in the string between -separate calls by simply adjusting the callers \fIs\fR pointer, so that -subsequent calls with the same pointer variable \fIs\fR will start -processing from the position immediately after the last returned token. -In this way subsequent calls will work through the string \fIs\fR until no -tokens remain. When no token remains in \fIs\fR, \f(CW\*(C`NULL\*(C'\fR is returned. The -string of token separators (\fIdelim\fR) and the string of quote characters -(\fIquote\fR) may be changed from call to call. -.Sp -If a character in the string \fIs\fR is not quoted or escaped, and is in the -\&\fIcomment\fR set, then it is overwritten with a \f(CW\*(C`NUL\*(C'\fR character and the rest of -the string is ignored. The characters to be used as quote characters are -specified in the \fIquote\fR set, and must be used in balanced pairs. If there -is more than one flavor of quote character, one kind of quote character may be -used to quote another kind. If an unbalanced quote is found, the function -silently act as if one had been placed at the end of the input string. The -\&\fIdelim\fR and \fIquote\fR strings must be disjoint, i.e., they have to share -no characters. -.Sp -The \fImode\fR argument can be used to modify the processing of the string -(default for \fImode\fR is \f(CW\*(C`0\*(C'\fR): \f(CW\*(C`STR_STRIPQUOTES\*(C'\fR forces \fIquote\fR -characters to be stripped from quoted tokens; \f(CW\*(C`STR_BACKSLASHESC\*(C'\fR -enables the interpretation (and expansion) of backslash escape sequences -(`\fB\ex\fR') through \s-1ANSI-C\s0 rules; \f(CW\*(C`STR_SKIPDELIMS\*(C'\fR forces that after the -terminating \f(CW\*(C`NUL\*(C'\fR is written and the token returned, further delimiters -are skipped (this allows one to make sure that the delimiters for -one word don't become part of the next word if one change delimiters -between calls); and \f(CW\*(C`STR_TRIGRAPHS\*(C'\fR enables the recognition and -expansion of \s-1ANSI\s0 C Trigraph sequences (as a side effect this enables -\&\f(CW\*(C`STR_BACKSLASHESC\*(C'\fR, too). -.Ip "int \fBstr_parse\fR(const char *\fIs\fR, const char *\fIpop\fR, ...);" 4 -.IX Item "int str_parse(const char *s, const char *pop, ...);" -This parses the string \fIs\fR according to the parsing operation specified -by \fIpop\fR. If the parsing operation succeeds, \f(CW\*(C`TRUE\*(C'\fR is returned. Else -\&\f(CW\*(C`FALSE\*(C'\fR is returned. -.Sp -The \fIpop\fR string usually has one of the following two syntax variants: -`\fBm\fR \fIdelim\fR \fIregex\fR \fIdelim\fR \fIflags\fR*' (for matching operations) -and `\fBs\fR \fIdelim\fR \fIregex\fR \fIdelim\fR \fIsubst\fR \fIdelim\fR \fIflags\fR*' (for -substitution operations). For more details about the syntax variants -and semantic of the \fIpop\fR argument see section \fB\s-1GORY\s0 \s-1DETAILS\s0, Parsing -Specification\fR below. The syntax of the \fIregex\fR part in \fIpop\fR is -mostly equivalent to Perl 5's regular expression syntax. For the -complete and gory details see \fIperlre\fR\|(1). A brief summary you can find -under section \fB\s-1GORY\s0 \s-1DETAILS\s0, Perl Regular Expressions\fR below. -.Ip "int \fBstr_format\fR(char *\fIs\fR, str_size_t \fIn\fR, const char *\fIfmt\fR, ...);" 4 -.IX Item "int str_format(char *s, str_size_t n, const char *fmt, ...);" -This formats a new string according to \fIfmt\fR and optionally following -arguments and writes it into the string \fIs\fR, but never more than \fIn\fR -characters at all. It returns the number of written characters. If \fIs\fR is -\&\f(CW\*(C`NULL\*(C'\fR it just calculates the number of characters which would be written. -.Sp -The function generates the output string under the control of the \fIfmt\fR -format string that specifies how subsequent arguments (or arguments accessed -via the variable-length argument facilities of \fIstdarg\fR\|(3)) are converted for -output. -.Sp -The format string \fIfmt\fR is composed of zero or more directives: -ordinary characters (not \fB%\fR), which are copied unchanged to the output -stream; and conversion specifications, each of which results in fetching -zero or more subsequent arguments. Each conversion specification is -introduced by the character \fB%\fR. The arguments must correspond properly -(after type promotion) with the conversion specifier. Which conversion -specifications are supported are described in detail under \fB\s-1GORY\s0 -\&\s-1DETAILS\s0, Format Specification\fR below. -.Ip "unsigned long \fBstr_hash\fR(const char *\fIs\fR, str_size_t \fIn\fR, int \fImode\fR);" 4 -.IX Item "unsigned long str_hash(const char *s, str_size_t n, int mode);" -This function calculates a hash value of string \fIs\fR (or of its first \fIn\fR -characters if \fIn\fR is equal to \f(CW\*(C`0\*(C'\fR). The following hashing functions -are supported and can be selected with \fImode\fR: \s-1STR_HASH_DJBX33\s0 (Daniel -J. Berstein, Times 33 Hash with Addition), \s-1STR_HASH_BJDDJ\s0 (Bob -Jenkins, Dr. Dobbs Journal), and \s-1STR_HASH_MACRC32\s0 (Mark Adler, Cyclic -Redundancy Check with 32\-Bit). This function is intended for fast use -in hashing algorithms and \fInot\fR for use as cryptographically strong -message digests. -.Ip "int \fBstr_base64\fR(char *\fIs\fR, str_size_t \fIn\fR, unsigned char *\fIucp\fR, str_size_t \fIucn\fR, int \fImode\fR);" 4 -.IX Item "int str_base64(char *s, str_size_t n, unsigned char *ucp, str_size_t ucn, int mode);" -This function Base64 encodes \fIucn\fR bytes starting at \fIucp\fR and writes -the resulting string into \fIs\fR (but never more than \fIn\fR characters are -written). The \fImode\fR for this operation has to be \f(CW\*(C`STR_BASE64_ENCODE\*(C'\fR. -Additionally one can \s-1OR\s0 the value \f(CW\*(C`STR_BASE64_STRICT\*(C'\fR to enable strict -encoding where after every 72th output character a newline character is -inserted. The function returns the number of output characters written. -If \fIs\fR is \f(CW\*(C`NULL\*(C'\fR the function just calculates the number of required -output characters. -.Sp -Alternatively, if \fImode\fR is \f(CW\*(C`STR_BASE64_DECODE\*(C'\fR the string \fIs\fR (or -the first \fIn\fR characters only if \fIn\fR is not \f(CW\*(C`0\*(C'\fR) is decoded and the -output bytes written at \fIucp\fR. Again, if \fIucp\fR is \f(CW\*(C`NULL\*(C'\fR only the -number of required output bytes are calculated. -.SH "GORY DETAILS" -.IX Header "GORY DETAILS" -In this part of the documentation more complex topics are documented in -detail. -.Sh "Perl Regular Expressions" -.IX Subsection "Perl Regular Expressions" -The regular expressions used in \fBStr\fR are more or less Perl compatible -(they are provided by a stripped down and built-in version of the -\&\fI\s-1PCRE\s0\fR library). So the syntax description in \fIperlre\fR\|(1) applies -and don't has to be repeated here again. For a deeper understanding -and details you should have a look at the book `\fIMastering Regular -Expressions\fR' (see also the \fIperlbook\fR\|(1) manpage) by \fIJeffrey Friedl\fR. -For convinience reasons we give you only a brief summary of Perl -compatible regular expressions: -.PP -The following metacharacters have their standard \fIegrep\fR\|(1) meanings: -.PP -.Vb 7 -\& \e Quote the next metacharacter -\& ^ Match the beginning of the line -\& . Match any character (except newline) -\& $ Match the end of the line (or before newline at the end) -\& | Alternation -\& () Grouping -\& [] Character class -.Ve -The following standard quantifiers are recognized: -.PP -.Vb 12 -\& * Match 0 or more times (greedy) -\& *? Match 0 or more times (non greedy) -\& + Match 1 or more times (greedy) -\& +? Match 1 or more times (non greedy) -\& ? Match 1 or 0 times (greedy) -\& ?? Match 1 or 0 times (non greedy) -\& {n} Match exactly n times (greedy) -\& {n}? Match exactly n times (non greedy) -\& {n,} Match at least n times (greedy) -\& {n,}? Match at least n times (non greedy) -\& {n,m} Match at least n but not more than m times (greedy) -\& {n,m}? Match at least n but not more than m times (non greedy) -.Ve -The following backslash sequences are recognized: -.PP -.Vb 15 -\& \et Tab (HT, TAB) -\& \en Newline (LF, NL) -\& \er Return (CR) -\& \ef Form feed (FF) -\& \ea Alarm (bell) (BEL) -\& \ee Escape (think troff) (ESC) -\& \e033 Octal char -\& \ex1B Hex char -\& \ec[ Control char -\& \el Lowercase next char -\& \eu Uppercase next char -\& \eL Lowercase till \eE -\& \eU Uppercase till \eE -\& \eE End case modification -\& \eQ Quote (disable) pattern metacharacters till \eE -.Ve -The following non zero-width assertions are recognized: -.PP -.Vb 6 -\& \ew Match a "word" character (alphanumeric plus "_") -\& \eW Match a non-word character -\& \es Match a whitespace character -\& \eS Match a non-whitespace character -\& \ed Match a digit character -\& \eD Match a non-digit character -.Ve -The following zero-width assertions are recognized: -.PP -.Vb 6 -\& \eb Match a word boundary -\& \eB Match a non-(word boundary) -\& \eA Match only at beginning of string -\& \eZ Match only at end of string, or before newline at the end -\& \ez Match only at end of string -\& \eG Match only where previous m//g left off (works only with /g) -.Ve -The following regular expression extensions are recognized: -.PP -.Vb 11 -\& (?#text) An embedded comment -\& (?:pattern) This is for clustering, not capturing (simple) -\& (?imsx-imsx:pattern) This is for clustering, not capturing (full) -\& (?=pattern) A zero-width positive lookahead assertion -\& (?!pattern) A zero-width negative lookahead assertion -\& (?<=pattern) A zero-width positive lookbehind assertion -\& (?pattern) An "independent" subexpression -\& (?(cond)yes-re) Conditional expression (simple) -\& (?(cond)yes-re|no-re) Conditional expression (full) -\& (?imsx-imsx) One or more embedded pattern-match modifiers -.Ve -.Sh "Parsing Specification" -.IX Subsection "Parsing Specification" -The \fBstr_parse\fR(const char *\fIs\fR, const char *\fIpop\fR, ...) function -is a very flexible but complex one. The argument \fIs\fR is the string on -which the parsing operation specified by argument \fIpop\fR is applied. -The parsing semantics are highly influenced by Perl's `\fB=~\fR' matching -operator, because one of the main goals of \fIstr_parse\fR\|(3) is to allow one -to rewrite typical Perl matching constructs into C. -.PP -Now to the gory details. In general, the \fIpop\fR argument of \fIstr_parse\fR\|(3) -has one of the following two syntax variants: -.Ip "\fBMatching:\fR `\fBm\fR \fIdelim\fR \fIregex\fR \fIdelim\fR \fIflags\fR*':" 4 -.IX Item "Matching: `m delim regex delim flags*':" -This matches \fIs\fR against the Perl-style regular expression \fIregex\fR -under the control of zero or more \fIflags\fR which control the parsing -semantics. The stripped down \fIpop\fR syntax `\fIregex\fR' is equivalent to -`\fBm/\fR\fIregex\fR\fB/\fR'. -.Sp -For each grouping pair of parenthesis in \fIregex\fR, the text in \fIs\fR -which was grouped by the parenthesis is extracted into new strings. -These per default are allocated as seperate strings and returned to the -caller through following `\fBchar **\fR' arguments. The caller is required -to \fIfree\fR\|(3) them later. -.Ip "\fBSubstitution:\fR `\fBs\fR \fIdelim\fR \fIregex\fR \fIdelim\fR \fIsubst\fR \fIdelim\fR \fIflags\fR*':" 4 -.IX Item "Substitution: `s delim regex delim subst delim flags*':" -This matches \fIs\fR against the Perl-style regular expression \fIregex\fR -under the control of zero or more \fIflags\fR which control the parsing -semantics. As a result of the operation, a new string formed which -consists of \fIs\fR but with the part which matched \fIregex\fR replaced by -\&\fIsubst\fR. The result string is returned to the caller through a `\fBchar -**\fR' argument. The caller is required to \fIfree\fR\|(3) this later. -.Sp -For each grouping pair of parenthesis in \fIregex\fR, the text in \fIs\fR -which was grouped by the parenthesis is extracted into new strings -and can be referenced for expansion via `\fB$n\fR' (n=1,..) in \fIsubst\fR. -Additionally any \fIstr_format\fR\|(3) style `\fB%\fR' constructs in \fIsubst\fR are -expanded through additional caller supplied arguments. -.PP -The following \fIflags\fR are supported: -.Ip "\fBb\fR" 4 -.IX Item "b" -If the \fIbundle\fR flag `\fBb\fR' is specified, the extracted strings are -bundled together into a single chunk of memory and its address is -returned to the caller with a additional `\fBchar **\fR' argument which has -to preceed the regular string arguments. The caller then has to \fIfree\fR\|(3) -only this chunk of memory in order to free all extracted strings at -once. -.Ip "\fBi\fR" 4 -.IX Item "i" -If the case-\fIinsensitive\fR flag `\fBi\fR' is specified, \fIregex\fR -is matched in case-insensitive way. -.Ip "\fBo\fR" 4 -.IX Item "o" -If the \fIonce\fR flag `\fBo\fR' is specified, this indicates to the \fBStr\fR -library that the whole \fIpop\fR string is constant and that its internal -pre-processing (it is compiled into a deterministic finite automaton -(\s-1DFA\s0) internally) has to be done only once (the \fBStr\fR library then -caches the \s-1DFA\s0 which corresponds to the \fIpop\fR argument). -.Ip "\fBx\fR" 4 -.IX Item "x" -If the \fIextended\fR flag `\fBx\fR' is specified, the \fIregex\fR's legibility -is extended by permitting embedded whitespace and comments to allow one -to write down complex regular expressions more cleary and even in a -documented way. -.Ip "\fBm\fR" 4 -.IX Item "m" -If the \fImultiple\fR lines flag `\fBm\fR' is specified, the string \fIs\fR is -treated as multiple lines. That is, this changes the regular expression -meta characters `\fB^\fR' and `\fB$\fR' from matching at only the very start -or end of the string \fIs\fR to the start or end of any line anywhere -within the string \fIs\fR. -.Ip "\fBs\fR" 4 -.IX Item "s" -If the \fIsingle\fR line flag `\fBs\fR' is specified, the string \fIs\fR is -treated as single line. That is, this changes the regular expression -meta character `\fB.\fR' to match any character whatsoever, even a newline, -which it normally would not match. -.SH "CONVERSION SPECIFICATION" -.IX Header "CONVERSION SPECIFICATION" -In the format string of \fIstr_format\fR\|(3) each conversion specification is -introduced by the character \fB%\fR. After the \fB%\fR, the following appear -in sequence: -.Ip "o" 4 -An optional field, consisting of a decimal digit string followed by a \fB$\fR, -specifying the next argument to access. If this field is not provided, the -argument following the last argument accessed will be used. Arguments are -numbered starting at \fB1\fR. If unaccessed arguments in the format string are -interspersed with ones that are accessed the results will be indeterminate. -.Ip "o" 4 -Zero or more of the following flags: -.Sp -A \fB#\fR character specifying that the value should be converted to an -``alternate form''. For \fBc\fR, \fBd\fR, \fBi\fR, \fBn\fR, \fBp\fR, \fBs\fR, and \fBu\fR, -conversions, this option has no effect. For \fBo\fR conversions, the precision -of the number is increased to force the first character of the output string -to a zero (except if a zero value is printed with an explicit precision of -zero). For \fBx\fR and \fBX\fR conversions, a non-zero result has the string \fB0x\fR -(or \fB0X\fR for \fBX\fR conversions) prepended to it. For \fBe\fR, \fBE\fR, \fBf\fR, \fBg\fR, -and \fBG\fR, conversions, the result will always contain a decimal point, even if -no digits follow it (normally, a decimal point appears in the results of those -conversions only if a digit follows). For \fBg\fR and \fBG\fR conversions, trailing -zeros are not removed from the result as they would otherwise be. -.Sp -A zero `\fB0\fR' character specifying zero padding. For all conversions except -\&\fBn\fR, the converted value is padded on the left with zeros rather than blanks. -If a precision is given with a numeric conversion (\fBd\fR, \fBi\fR, \fBo\fR, \fBu\fR, -\&\fBi\fR, \fBx\fR, and \fBX\fR), the `\fB0\fR' flag is ignored. -.Sp -A negative field width flag `\fB-\fR' indicates the converted value is to be left -adjusted on the field boundary. Except for \fBn\fR conversions, the converted -value is padded on the right with blanks, rather than on the left with blanks -or zeros. A `\fB-\fR' overrides a `\fB0\fR' if both are given. -.Sp -A space, specifying that a blank should be left before a positive number -produced by a signed conversion (\fBd\fR, \fBe\fR, \fBE\fR, \fBf\fR, \fBg\fR, \fBG\fR, or \fBi\fR). -.Sp -A `\fB+\fR' character specifying that a sign always be placed before a number -produced by a signed conversion. A `\fB+\fR' overrides a space if both are used. -.Ip "o" 4 -An optional decimal digit string specifying a minimum field width. -If the converted value has fewer characters than the field width, it will -be padded with spaces on the left (or right, if the left-adjustment -flag has been given) to fill out -the field width. -.Ip "o" 4 -An optional precision, in the form of a period `\fB.\fR' followed by an -optional digit string. If the digit string is omitted, the precision is -taken as zero. This gives the minimum number of digits to appear for -\&\fBd\fR, \fBi\fR, \fBo\fR, \fBu\fR, \fBx\fR, and \fBX\fR conversions, the number of digits -to appear after the decimal-point for \fBe\fR, \fBE\fR, and \fBf\fR conversions, -the maximum number of significant digits for \fBg\fR and \fBG\fR conversions, -or the maximum number of characters to be printed from a string for \fBs\fR -conversions. -.Ip "o" 4 -The optional character \fBh\fR, specifying that a following \fBd\fR, \fBi\fR, \fBo\fR, -\&\fBu\fR, \fBx\fR, or \fBX\fR conversion corresponds to a `\f(CW\*(C`short int\*(C'\fR' or `\f(CW\*(C`unsigned -short int\*(C'\fR' argument, or that a following \fBn\fR conversion corresponds to a -pointer to a `\f(CW\*(C`short int\*(C'\fR argument. -.Ip "o" 4 -The optional character \fBl\fR (ell) specifying that a following \fBd\fR, \fBi\fR, -\&\fBo\fR, \fBu\fR, \fBx\fR, or \fBX\fR conversion applies to a pointer to a `\f(CW\*(C`long int\*(C'\fR' -or `\f(CW\*(C`unsigned long int\*(C'\fR' argument, or that a following \fBn\fR conversion -corresponds to a pointer to a `\f(CW\*(C`long int\*(C'\fR argument. -.Ip "o" 4 -The optional character \fBq\fR, specifying that a following \fBd\fR, \fBi\fR, \fBo\fR, -\&\fBu\fR, \fBx\fR, or \fBX\fR conversion corresponds to a `\f(CW\*(C`quad int\*(C'\fR' or `\f(CW\*(C`unsigned -quad int\*(C'\fR' argument, or that a following \fBn\fR conversion corresponds to a -pointer to a `\f(CW\*(C`quad int\*(C'\fR' argument. -.Ip "o" 4 -The character \fBL\fR specifying that a following \fBe\fR, \fBE\fR, \fBf\fR, \fBg\fR, or \fBG\fR -conversion corresponds to a `\f(CW\*(C`long double\*(C'\fR' argument. -.Ip "o" 4 -A character that specifies the type of conversion to be applied. -.PP -A field width or precision, or both, may be indicated by an asterisk `\fB*\fR' or -an asterisk followed by one or more decimal digits and a `\fB$\fR' instead of a -digit string. In this case, an `\f(CW\*(C`int\*(C'\fR' argument supplies the field width or -precision. A negative field width is treated as a left adjustment flag -followed by a positive field width; a negative precision is treated as though -it were missing. If a single format directive mixes positional (`\fBnn$\fR') and -non-positional arguments, the results are undefined. -.PP -The conversion specifiers and their meanings are: -.Ip "\fBdiouxX\fR" 4 -.IX Item "diouxX" -The `\f(CW\*(C`int\*(C'\fR' (or appropriate variant) argument is converted to signed decimal -(\fBd\fR and \fBi\fR), unsigned octal (\fBo\fR), unsigned decimal (\fBu\fR), or unsigned -hexadecimal (\fBx\fR and \fBX\fR) notation. The letters \fBabcdef\fR are used for \fBx\fR -conversions; the letters \fB\s-1ABCDEF\s0\fR are used for \fBX\fR conversions. The -precision, if any, gives the minimum number of digits that must appear; if the -converted value requires fewer digits, it is padded on the left with zeros. -.Ip "\fB\s-1DOU\s0\fR" 4 -.IX Item "DOU" -The `\f(CW\*(C`long int\*(C'\fR argument is converted to signed decimal, unsigned octal, or -unsigned decimal, as if the format had been \fBld\fR, \fBlo\fR, or \fBlu\fR -respectively. These conversion characters are deprecated, and will eventually -disappear. -.Ip "\fBeE\fR" 4 -.IX Item "eE" -The `\f(CW\*(C`double\*(C'\fR' argument is rounded and converted in the style -`[\-]d.ddd\fBe\fR+\-dd' where there is one digit before the decimal-point character -and the number of digits after it is equal to the precision; if the precision -is missing, it is taken as 6; if the precision is zero, no decimal-point -character appears. An \fBE\fR conversion uses the letter \fBE\fR (rather than \fBe\fR) -to introduce the exponent. The exponent always contains at least two digits; -if the value is zero, the exponent is 00. -.Ip "\fBf\fR" 4 -.IX Item "f" -The `\f(CW\*(C`double\*(C'\fR' argument is rounded and converted to decimal notation in the -style `[\-]ddd.ddd>' where the number of digits after the decimal-point -character is equal to the precision specification. If the precision is -missing, it is taken as 6; if the precision is explicitly zero, no -decimal-point character appears. If a decimal point appears, at least one -digit appears before it. -.Ip "\fBg\fR" 4 -.IX Item "g" -The `\f(CW\*(C`double\*(C'\fR' argument is converted in style \fBf\fR or \fBe\fR (or \fBE\fR for \fBG\fR -conversions). The precision specifies the number of significant digits. If -the precision is missing, 6 digits are given; if the precision is zero, it is -treated as 1. Style \fBe\fR is used if the exponent from its conversion is less -than \-4 or greater than or equal to the precision. Trailing zeros are removed -from the fractional part of the result; a decimal point appears only if it is -followed by at least one digit. -.Ip "\fBc\fR" 4 -.IX Item "c" -The `\f(CW\*(C`int\*(C'\fR' argument is converted to an `\f(CW\*(C`unsigned char\*(C'\fR, and the resulting -character is written. -.Ip "\fBs\fR" 4 -.IX Item "s" -The `\f(CW\*(C`char *\*(C'\fR' argument is expected to be a pointer to an array of character -type (pointer to a string). Characters from the array are written up to (but -not including) a terminating \f(CW\*(C`NUL\*(C'\fR character; if a precision is specified, no -more than the number specified are written. If a precision is given, no null -character need be present; if the precision is not specified, or is greater -than the size of the array, the array must contain a terminating \f(CW\*(C`NUL\*(C'\fR -character. -.Ip "\fBp\fR" 4 -.IX Item "p" -The `\f(CW\*(C`void *\*(C'\fR pointer argument is printed in hexadecimal (as if by `\fB%#x\fR' -or `\f(CW\*(C`%#lx\*(C'\fR). -.Ip "\fBn\fR" 4 -.IX Item "n" -The number of characters written so far is stored into the integer indicated -by the `\f(CW\*(C`int *\*(C'\fR' (or variant) pointer argument. No argument is converted. -.Ip "\fB%\fR" 4 -.IX Item "%" -A `\fB%\fR' is written. No argument is converted. The complete conversion -specification is `\fB%%\fR. -.PP -In no case does a non-existent or small field width cause truncation of a -field; if the result of a conversion is wider than the field width, the field -is expanded to contain the conversion result. -.SH "EXAMPLES" -.IX Header "EXAMPLES" -In the following a few snippets of selected use cases of \fBStr\fR are -presented: -.Ip "\fBSplice a String into Another\fR" 4 -.IX Item "Splice a String into Another" -.Vb 5 -\& char *v1 = "foo bar quux"; -\& char *v2 = "baz"; -\& str_splice(v1, 3, 5, v2, 0): -\& /* now we have v1 = "foobazquux" */ -\& .... -.Ve -.Ip "\fBTokenize a String\fR" 4 -.IX Item "Tokenize a String" -.Vb 10 -\& char *var = " foo \et " bar 'baz'" q'uu'x #comment"; -\& char *tok, *p; -\& p = var; -\& while ((tok = str_token(p, ":", "\e"'", "#", 0)) != NULL) { -\& /* here we enter three times: -\& 1. tok = "foo" -\& 2. tok = " bar 'baz'" -\& 3. tok = "quux" */ -\& ... -\& } -.Ve -.Ip "\fBMatch a String\fR" 4 -.IX Item "Match a String" -.Vb 5 -\& char *var = "foo:bar"; -\& if (str_parse(var, "^.+?:.+$/)) { -\& /* var matched */ -\& ... -\& } -.Ve -.Ip "\fBMatch a String and Go Ahead with Details\fR" 4 -.IX Item "Match a String and Go Ahead with Details" -.Vb 10 -\& char *var = "foo:bar"; -\& char *cp, *v1, *v2; -\& if (str_parse(var, "m/^(.+?):(.+)$/b", &cp, &v1, &v2)) { -\& ... -\& /* now we have: -\& cp = "foo\e0bar\e0" and v1 and v2 pointing -\& into it, i.e., v1 = "foo", v2 = "bar" */ -\& ... -\& free(cp); -\& } -.Ve -.Ip "\fBSubstitute Text in a String\fR" 4 -.IX Item "Substitute Text in a String" -.Vb 8 -\& char *var = "foo:bar"; -\& char *subst = "quux"; -\& char *new; -\& str_parse(var, "s/^(.+?):(.+)$/$1-%s-$2/", &new, subst); -\& ... -\& /* now we have: var = "foo:bar", new = "foo:quux:bar" */ -\& ... -\& free(new); -.Ve -.Ip "\fBFormat a String\fR" 4 -.IX Item "Format a String" -.Vb 6 -\& char *v0 = "abc..."; /* length not guessable */ -\& char *v1 = "foo"; -\& void *v2 = 0xDEAD; -\& int v3 = 42; -\& char *cp; -\& int n; -.Ve -.Vb 6 -\& n = str_format(NULL, 0, "%s|%5s-%x-%04d", v0, v1, v2, v3); -\& cp = malloc(n); -\& str_format(cp, n, "%s-%x-%04d", v1, v2, v3); -\& /* now we have cp = "abc...| foo-DEAD-0042" */ -\& ... -\& free(cp); -.Ve -.SH "SEE ALSO" -.IX Header "SEE ALSO" -\&\fIstring\fR\|(3), \fIprintf\fR\|(3), \fIperlre\fR\|(1). -.SH "HISTORY" -.IX Header "HISTORY" -The \fBStr\fR library was written in November and December 1999 by Ralf -S. Engelschall. As building blocks various existing code was used and -recycled: for the \fIstr_token\fR\|(3) implementation an anchient \fIstrtok\fR\|(3) -flavor from William Deich 1991 was cleaned up and adjusted. As the -background parsing engine for \fIstr_parse\fR\|(3) a heavily stripped down -version of Philip Hazel's \s-1PCRE\s0 2.08 library was used. The \fIstr_format\fR\|(3) -implementation was based on Panos Tsirigotis' \fIsprintf\fR\|(3) code as -adjusted by the Apache Software Foundation 1998. The formatting engine -was stripped down and enhanced to support internal extensions which were -required by \fIstr_format\fR\|(3) and \fIstr_parse\fR\|(3). -.SH "AUTHOR" -.IX Header "AUTHOR" -.Vb 3 -\& Ralf S. Engelschall -\& rse@engelschall.com -\& www.engelschall.com -.Ve Index: ossp-pkg/l2/l2.pod RCS File: /v/ossp/cvs/ossp-pkg/l2/l2.pod,v rcsdiff -q -kk '-r1.1' '-r1.2' -u '/v/ossp/cvs/ossp-pkg/l2/l2.pod,v' 2>/dev/null --- l2.pod 2001/05/10 19:46:01 1.1 +++ l2.pod 2001/05/10 20:00:31 1.2 @@ -1,9 +1,10 @@ ## -## Str - String Library -## Copyright (c) 1999-2000 Ralf S. Engelschall +## L2 - OSSP Logging Library +## Copyright (c) 2001 The OSSP Project (http://www.ossp.org/) +## Copyright (c) 2001 Cable & Wireless Deutschland (http://www.cw.com/de/) ## -## This file is part of Str, a string handling and manipulation -## library which can be found at http://www.engelschall.com/sw/str/. +## This file is part of OSSP L2, a flexible logging library which +## can be found at http://www.ossp.com/pkg/l2/. ## ## Permission to use, copy, modify, and distribute this software for ## any purpose with or without fee is hereby granted, provided that @@ -22,750 +23,31 @@ ## OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT ## OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF ## SUCH DAMAGE. +## +## l2.pod: Unix manual page ## -## str.pod: Unix manual page -## - -# Parts of this manual page (the str_format description) is: -# -# Copyright (c) 1990, 1991, 1993 -# The Regents of the University of California. All rights reserved. -# -# This code is derived from software contributed to Berkeley by -# Chris Torek and the American National Standards Committee X3, -# on Information Processing Systems. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# 1. Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# 3. All advertising materials mentioning features or use of this software -# must display the following acknowledgement: -# This product includes software developed by the University of -# California, Berkeley and its contributors. -# 4. Neither the name of the University nor the names of its contributors -# may be used to endorse or promote products derived from this software -# without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS -# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) -# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT -# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY -# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF -# SUCH DAMAGE. =pod =head1 NAME -B - String Library +B - Logging Library =head1 VERSION -Str STR_VERSION_STR +L2 L2_VERSION_STR =head1 SYNOPSIS -B, -B, -B, -B, -B, -B, -B, -B, -B, -B, -B, -B, -B. +... =head1 DESCRIPTION -The B library is a generic string library written in ANSI C which -provides functions for handling, matching, parsing, searching and -formatting of C strings. So it can be considered as a superset of POSIX -string(3), but its main intention is to provide a more convinient and -compact API plus a more generalized functionality. +The B library is... =head1 FUNCTIONS The following functions are provided by the B API: -=over 4 - -=item str_size_t B(const char *I); - -This function determines the length of string I, i.e., the number -of characters starting at I that precede the terminating C -character. It returns C if I is C. - -=item char *B(char *I, const char *I, size_t I); - -This copies the characters in string I into the string I, but never more -than I characters (if I is greater than C<0>). The two involved strings -can overlap and the characters in I are always C-terminated. The -string I has to be large enough to hold all characters to be copied. -function returns C if I or I are C. Else it returns the -pointer to the written C-terminating character in I. - -=item char *B(const char *I, str_size_t I); - -This returns a copy of the characters in string I, but never more than I -characters if I is greater than C<0>. It returns C if I is -C. The returned string has to be deallocated later with free(3). - -=item char *B(char *I, ...); - -This functions concatenates the characters of all string arguments into a new -allocated string and returns this new string. If I is C the function -returns C. Else it returns the pointer to the written final -C-terminating character in I. The returned string later has to be -deallicated with free(3). - -=item char *B(char *I, str_size_t I, str_size_t I, char *I, str_size_t I); - -This splices the string I into string I, i.e., the I characters -at offset I in I are removed and at their location the string -I is inserted (or just the first I characters of I if I is -greater than C<0>). It returns C if I or I are C. -Else the string I is returned. The function supports also the -situation where I is a sub-string of I as long as the area -I...I and I...I do not overlap. The caller -always has to make sure that enough room exists in I. - -=item int B(const char *I, const char *I, str_size_t I, int I); - -This performs a lexicographical comparison of the two strings I -and I (but never compares more than I characters of them) -and returns one of three return values: a value lower than C<0> if -I is lexicographically lower than I, a vlue of exactly C<0> -if I and I are equal and a value greater than C<0> if I is -lexicographically higher than I. Per default (I is C<0>) the -comparison is case-sensitive, but if C is used for I -the comparison is done in a case-insensitive way. - -=item char *B(const char *I, size_t I, const char *I, int I); - -This functions spans a string I according to the characters specified in -I. If I is C<0>, this means that I is spanned from left to -right starting at I (and ending either when reaching the terminating C -character or already after I spanned characters) as long as the characters -of I are contained in I. - -Alternatively one can use a I of C to indicate that I -is spanned as long as the characters of I are I contained in -I, i.e., I then specifies the complement of the spanning -characters. - -In both cases one can additionally "or" (with the C operator ``C<|>'') -C into I to indicate that the spanning is done right to -left starting at the terminating C character of I (and ending -either when reaching I or already after I spanned characters). - -=item char *B(const char *I, str_size_t I, const char *I); - -This functions searches for the (smaller) string I inside (larger) string -I. If I is not C<0>, the search is performed only inside the first I -characters of I. - -=item char *B(char **I, const char *I, const char *I, const char *I, int I); - -This function considers the string I to consist of a sequence of -zero or more text tokens separated by spans of one or more characters -from the separator string I. However, text between matched pairs -of quotemarks (characters in I) is treated as plain text, never -as delimiter (separator) text. Each call of this function returns a -pointer to the first character of the first token of I. The token is -C-terminated, i.e., the string I is processed in a destructive -way. If there are quotation marks or escape sequences, the input -string is rewritten with quoted sections and escape sequences properly -interpreted. - -This function keeps track of its parsing position in the string between -separate calls by simply adjusting the callers I pointer, so that -subsequent calls with the same pointer variable I will start -processing from the position immediately after the last returned token. -In this way subsequent calls will work through the string I until no -tokens remain. When no token remains in I, C is returned. The -string of token separators (I) and the string of quote characters -(I) may be changed from call to call. - -If a character in the string I is not quoted or escaped, and is in the -I set, then it is overwritten with a C character and the rest of -the string is ignored. The characters to be used as quote characters are -specified in the I set, and must be used in balanced pairs. If there -is more than one flavor of quote character, one kind of quote character may be -used to quote another kind. If an unbalanced quote is found, the function -silently act as if one had been placed at the end of the input string. The -I and I strings must be disjoint, i.e., they have to share -no characters. - -The I argument can be used to modify the processing of the string -(default for I is C<0>): C forces I -characters to be stripped from quoted tokens; C -enables the interpretation (and expansion) of backslash escape sequences -(`B<\x>') through ANSI-C rules; C forces that after the -terminating C is written and the token returned, further delimiters -are skipped (this allows one to make sure that the delimiters for -one word don't become part of the next word if one change delimiters -between calls); and C enables the recognition and -expansion of ANSI C Trigraph sequences (as a side effect this enables -C, too). - -=item int B(const char *I, const char *I, ...); - -This parses the string I according to the parsing operation specified -by I. If the parsing operation succeeds, C is returned. Else -C is returned. - -The I string usually has one of the following two syntax variants: -`B I I I I*' (for matching operations) -and `B I I I I I I*' (for -substitution operations). For more details about the syntax variants -and semantic of the I argument see section B below. The syntax of the I part in I is -mostly equivalent to Perl 5's regular expression syntax. For the -complete and gory details see perlre(1). A brief summary you can find -under section B below. - -=item int B(char *I, str_size_t I, const char *I, ...); - -This formats a new string according to I and optionally following -arguments and writes it into the string I, but never more than I -characters at all. It returns the number of written characters. If I is -C it just calculates the number of characters which would be written. - -The function generates the output string under the control of the I -format string that specifies how subsequent arguments (or arguments accessed -via the variable-length argument facilities of stdarg(3)) are converted for -output. - -The format string I is composed of zero or more directives: -ordinary characters (not B<%>), which are copied unchanged to the output -stream; and conversion specifications, each of which results in fetching -zero or more subsequent arguments. Each conversion specification is -introduced by the character B<%>. The arguments must correspond properly -(after type promotion) with the conversion specifier. Which conversion -specifications are supported are described in detail under B below. - -=item unsigned long B(const char *I, str_size_t I, int I); - -This function calculates a hash value of string I (or of its first I -characters if I is equal to C<0>). The following hashing functions -are supported and can be selected with I: STR_HASH_DJBX33 (Daniel -J. Berstein, Times 33 Hash with Addition), STR_HASH_BJDDJ (Bob -Jenkins, Dr. Dobbs Journal), and STR_HASH_MACRC32 (Mark Adler, Cyclic -Redundancy Check with 32-Bit). This function is intended for fast use -in hashing algorithms and I for use as cryptographically strong -message digests. - -=item int B(char *I, str_size_t I, unsigned char *I, str_size_t I, int I); - -This function Base64 encodes I bytes starting at I and writes -the resulting string into I (but never more than I characters are -written). The I for this operation has to be C. -Additionally one can OR the value C to enable strict -encoding where after every 72th output character a newline character is -inserted. The function returns the number of output characters written. -If I is C the function just calculates the number of required -output characters. - -Alternatively, if I is C the string I (or -the first I characters only if I is not C<0>) is decoded and the -output bytes written at I. Again, if I is C only the -number of required output bytes are calculated. - -=back - -=head1 GORY DETAILS - -In this part of the documentation more complex topics are documented in -detail. - -=head2 Perl Regular Expressions - -The regular expressions used in B are more or less Perl compatible -(they are provided by a stripped down and built-in version of the -I library). So the syntax description in perlre(1) applies -and don't has to be repeated here again. For a deeper understanding -and details you should have a look at the book `I' (see also the perlbook(1) manpage) by I. -For convinience reasons we give you only a brief summary of Perl -compatible regular expressions: - -The following metacharacters have their standard egrep(1) meanings: - - \ Quote the next metacharacter - ^ Match the beginning of the line - . Match any character (except newline) - $ Match the end of the line (or before newline at the end) - | Alternation - () Grouping - [] Character class - -The following standard quantifiers are recognized: - - * Match 0 or more times (greedy) - *? Match 0 or more times (non greedy) - + Match 1 or more times (greedy) - +? Match 1 or more times (non greedy) - ? Match 1 or 0 times (greedy) - ?? Match 1 or 0 times (non greedy) - {n} Match exactly n times (greedy) - {n}? Match exactly n times (non greedy) - {n,} Match at least n times (greedy) - {n,}? Match at least n times (non greedy) - {n,m} Match at least n but not more than m times (greedy) - {n,m}? Match at least n but not more than m times (non greedy) - -The following backslash sequences are recognized: - - \t Tab (HT, TAB) - \n Newline (LF, NL) - \r Return (CR) - \f Form feed (FF) - \a Alarm (bell) (BEL) - \e Escape (think troff) (ESC) - \033 Octal char - \x1B Hex char - \c[ Control char - \l Lowercase next char - \u Uppercase next char - \L Lowercase till \E - \U Uppercase till \E - \E End case modification - \Q Quote (disable) pattern metacharacters till \E - -The following non zero-width assertions are recognized: - - \w Match a "word" character (alphanumeric plus "_") - \W Match a non-word character - \s Match a whitespace character - \S Match a non-whitespace character - \d Match a digit character - \D Match a non-digit character - -The following zero-width assertions are recognized: - - \b Match a word boundary - \B Match a non-(word boundary) - \A Match only at beginning of string - \Z Match only at end of string, or before newline at the end - \z Match only at end of string - \G Match only where previous m//g left off (works only with /g) - -The following regular expression extensions are recognized: - - (?#text) An embedded comment - (?:pattern) This is for clustering, not capturing (simple) - (?imsx-imsx:pattern) This is for clustering, not capturing (full) - (?=pattern) A zero-width positive lookahead assertion - (?!pattern) A zero-width negative lookahead assertion - (?<=pattern) A zero-width positive lookbehind assertion - (?pattern) An "independent" subexpression - (?(cond)yes-re) Conditional expression (simple) - (?(cond)yes-re|no-re) Conditional expression (full) - (?imsx-imsx) One or more embedded pattern-match modifiers - -=head2 Parsing Specification - -The B(const char *I, const char *I, ...) function -is a very flexible but complex one. The argument I is the string on -which the parsing operation specified by argument I is applied. -The parsing semantics are highly influenced by Perl's `B<=~>' matching -operator, because one of the main goals of str_parse(3) is to allow one -to rewrite typical Perl matching constructs into C. - -Now to the gory details. In general, the I argument of str_parse(3) -has one of the following two syntax variants: - -=over 4 - -=item B `B I I I I*': - -This matches I against the Perl-style regular expression I -under the control of zero or more I which control the parsing -semantics. The stripped down I syntax `I' is equivalent to -`BIB'. - -For each grouping pair of parenthesis in I, the text in I -which was grouped by the parenthesis is extracted into new strings. -These per default are allocated as seperate strings and returned to the -caller through following `B' arguments. The caller is required -to free(3) them later. - -=item B `B I I I I I I*': - -This matches I against the Perl-style regular expression I -under the control of zero or more I which control the parsing -semantics. As a result of the operation, a new string formed which -consists of I but with the part which matched I replaced by -I. The result string is returned to the caller through a `B' argument. The caller is required to free(3) this later. - -For each grouping pair of parenthesis in I, the text in I -which was grouped by the parenthesis is extracted into new strings -and can be referenced for expansion via `B<$n>' (n=1,..) in I. -Additionally any str_format(3) style `B<%>' constructs in I are -expanded through additional caller supplied arguments. - -=back - -The following I are supported: - -=over 4 - -=item B - -If the I flag `B' is specified, the extracted strings are -bundled together into a single chunk of memory and its address is -returned to the caller with a additional `B' argument which has -to preceed the regular string arguments. The caller then has to free(3) -only this chunk of memory in order to free all extracted strings at -once. - -=item B - -If the case-I flag `B' is specified, I -is matched in case-insensitive way. - -=item B - -If the I flag `B' is specified, this indicates to the B -library that the whole I string is constant and that its internal -pre-processing (it is compiled into a deterministic finite automaton -(DFA) internally) has to be done only once (the B library then -caches the DFA which corresponds to the I argument). - -=item B - -If the I flag `B' is specified, the I's legibility -is extended by permitting embedded whitespace and comments to allow one -to write down complex regular expressions more cleary and even in a -documented way. - -=item B - -If the I lines flag `B' is specified, the string I is -treated as multiple lines. That is, this changes the regular expression -meta characters `B<^>' and `B<$>' from matching at only the very start -or end of the string I to the start or end of any line anywhere -within the string I. - -=item B - -If the I line flag `B' is specified, the string I is -treated as single line. That is, this changes the regular expression -meta character `B<.>' to match any character whatsoever, even a newline, -which it normally would not match. - -=back - - -=head1 CONVERSION SPECIFICATION - -In the format string of str_format(3) each conversion specification is -introduced by the character B<%>. After the B<%>, the following appear -in sequence: - -=over 4 - -=item o - -An optional field, consisting of a decimal digit string followed by a B<$>, -specifying the next argument to access. If this field is not provided, the -argument following the last argument accessed will be used. Arguments are -numbered starting at B<1>. If unaccessed arguments in the format string are -interspersed with ones that are accessed the results will be indeterminate. - -=item o - -Zero or more of the following flags: - -A B<#> character specifying that the value should be converted to an -``alternate form''. For B, B, B, B, B

, B, and B, -conversions, this option has no effect. For B conversions, the precision -of the number is increased to force the first character of the output string -to a zero (except if a zero value is printed with an explicit precision of -zero). For B and B conversions, a non-zero result has the string B<0x> -(or B<0X> for B conversions) prepended to it. For B, B, B, B, -and B, conversions, the result will always contain a decimal point, even if -no digits follow it (normally, a decimal point appears in the results of those -conversions only if a digit follows). For B and B conversions, trailing -zeros are not removed from the result as they would otherwise be. - -A zero `B<0>' character specifying zero padding. For all conversions except -B, the converted value is padded on the left with zeros rather than blanks. -If a precision is given with a numeric conversion (B, B, B, B, -B, B, and B), the `B<0>' flag is ignored. - -A negative field width flag `B<->' indicates the converted value is to be left -adjusted on the field boundary. Except for B conversions, the converted -value is padded on the right with blanks, rather than on the left with blanks -or zeros. A `B<->' overrides a `B<0>' if both are given. - -A space, specifying that a blank should be left before a positive number -produced by a signed conversion (B, B, B, B, B, B, or B). - -A `B<+>' character specifying that a sign always be placed before a number -produced by a signed conversion. A `B<+>' overrides a space if both are used. - -=item o - -An optional decimal digit string specifying a minimum field width. -If the converted value has fewer characters than the field width, it will -be padded with spaces on the left (or right, if the left-adjustment -flag has been given) to fill out -the field width. - -=item o - -An optional precision, in the form of a period `B<.>' followed by an -optional digit string. If the digit string is omitted, the precision is -taken as zero. This gives the minimum number of digits to appear for -B, B, B, B, B, and B conversions, the number of digits -to appear after the decimal-point for B, B, and B conversions, -the maximum number of significant digits for B and B conversions, -or the maximum number of characters to be printed from a string for B -conversions. - -=item o - -The optional character B, specifying that a following B, B, B, -B, B, or B conversion corresponds to a `C' or `C' argument, or that a following B conversion corresponds to a -pointer to a `C argument. - -=item o - -The optional character B (ell) specifying that a following B, B, -B, B, B, or B conversion applies to a pointer to a `C' -or `C' argument, or that a following B conversion -corresponds to a pointer to a `C argument. - -=item o - -The optional character B, specifying that a following B, B, B, -B, B, or B conversion corresponds to a `C' or `C' argument, or that a following B conversion corresponds to a -pointer to a `C' argument. - -=item o - -The character B specifying that a following B, B, B, B, or B -conversion corresponds to a `C' argument. - -=item o - -A character that specifies the type of conversion to be applied. - -=back - -A field width or precision, or both, may be indicated by an asterisk `B<*>' or -an asterisk followed by one or more decimal digits and a `B<$>' instead of a -digit string. In this case, an `C' argument supplies the field width or -precision. A negative field width is treated as a left adjustment flag -followed by a positive field width; a negative precision is treated as though -it were missing. If a single format directive mixes positional (`B') and -non-positional arguments, the results are undefined. - -The conversion specifiers and their meanings are: - -=over 4 - -=item B - -The `C' (or appropriate variant) argument is converted to signed decimal -(B and B), unsigned octal (B), unsigned decimal (B), or unsigned -hexadecimal (B and B) notation. The letters B are used for B -conversions; the letters B are used for B conversions. The -precision, if any, gives the minimum number of digits that must appear; if the -converted value requires fewer digits, it is padded on the left with zeros. - -=item B - -The `C argument is converted to signed decimal, unsigned octal, or -unsigned decimal, as if the format had been B, B, or B -respectively. These conversion characters are deprecated, and will eventually -disappear. - -=item B - -The `C' argument is rounded and converted in the style -`[-]d.dddB+-dd' where there is one digit before the decimal-point character -and the number of digits after it is equal to the precision; if the precision -is missing, it is taken as 6; if the precision is zero, no decimal-point -character appears. An B conversion uses the letter B (rather than B) -to introduce the exponent. The exponent always contains at least two digits; -if the value is zero, the exponent is 00. - -=item B - -The `C' argument is rounded and converted to decimal notation in the -style `[-]ddd.ddd>' where the number of digits after the decimal-point -character is equal to the precision specification. If the precision is -missing, it is taken as 6; if the precision is explicitly zero, no -decimal-point character appears. If a decimal point appears, at least one -digit appears before it. - -=item B - -The `C' argument is converted in style B or B (or B for B -conversions). The precision specifies the number of significant digits. If -the precision is missing, 6 digits are given; if the precision is zero, it is -treated as 1. Style B is used if the exponent from its conversion is less -than -4 or greater than or equal to the precision. Trailing zeros are removed -from the fractional part of the result; a decimal point appears only if it is -followed by at least one digit. - -=item B - -The `C' argument is converted to an `C, and the resulting -character is written. - -=item B - -The `C' argument is expected to be a pointer to an array of character -type (pointer to a string). Characters from the array are written up to (but -not including) a terminating C character; if a precision is specified, no -more than the number specified are written. If a precision is given, no null -character need be present; if the precision is not specified, or is greater -than the size of the array, the array must contain a terminating C -character. - -=item B

- -The `C pointer argument is printed in hexadecimal (as if by `B<%#x>' -or `C<%#lx>). - -=item B - -The number of characters written so far is stored into the integer indicated -by the `C' (or variant) pointer argument. No argument is converted. - -=item B<%> - -A `B<%>' is written. No argument is converted. The complete conversion -specification is `B<%%>. - -=back - -In no case does a non-existent or small field width cause truncation of a -field; if the result of a conversion is wider than the field width, the field -is expanded to contain the conversion result. - -=head1 EXAMPLES - -In the following a few snippets of selected use cases of B are -presented: - -=over 4 - -=item B - - char *v1 = "foo bar quux"; - char *v2 = "baz"; - str_splice(v1, 3, 5, v2, 0): - /* now we have v1 = "foobazquux" */ - .... - -=item B - - char *var = " foo \t " bar 'baz'" q'uu'x #comment"; - char *tok, *p; - p = var; - while ((tok = str_token(p, ":", "\"'", "#", 0)) != NULL) { - /* here we enter three times: - 1. tok = "foo" - 2. tok = " bar 'baz'" - 3. tok = "quux" */ - ... - } - -=item B - - char *var = "foo:bar"; - if (str_parse(var, "^.+?:.+$/)) { - /* var matched */ - ... - } - -=item B - - char *var = "foo:bar"; - char *cp, *v1, *v2; - if (str_parse(var, "m/^(.+?):(.+)$/b", &cp, &v1, &v2)) { - ... - /* now we have: - cp = "foo\0bar\0" and v1 and v2 pointing - into it, i.e., v1 = "foo", v2 = "bar" */ - ... - free(cp); - } - -=item B - - char *var = "foo:bar"; - char *subst = "quux"; - char *new; - str_parse(var, "s/^(.+?):(.+)$/$1-%s-$2/", &new, subst); - ... - /* now we have: var = "foo:bar", new = "foo:quux:bar" */ - ... - free(new); - -=item B - - char *v0 = "abc..."; /* length not guessable */ - char *v1 = "foo"; - void *v2 = 0xDEAD; - int v3 = 42; - char *cp; - int n; - - n = str_format(NULL, 0, "%s|%5s-%x-%04d", v0, v1, v2, v3); - cp = malloc(n); - str_format(cp, n, "%s-%x-%04d", v1, v2, v3); - /* now we have cp = "abc...| foo-DEAD-0042" */ - ... - free(cp); - -=back - -=head1 SEE ALSO - -string(3), printf(3), perlre(1). - -=head1 HISTORY - -The B library was written in November and December 1999 by Ralf -S. Engelschall. As building blocks various existing code was used and -recycled: for the str_token(3) implementation an anchient strtok(3) -flavor from William Deich 1991 was cleaned up and adjusted. As the -background parsing engine for str_parse(3) a heavily stripped down -version of Philip Hazel's PCRE 2.08 library was used. The str_format(3) -implementation was based on Panos Tsirigotis' sprintf(3) code as -adjusted by the Apache Software Foundation 1998. The formatting engine -was stripped down and enhanced to support internal extensions which were -required by str_format(3) and str_parse(3). - -=head1 AUTHOR - - Ralf S. Engelschall - rse@engelschall.com - www.engelschall.com - =cut