Index: ossp-pkg/str/ChangeLog RCS File: /v/ossp/cvs/ossp-pkg/str/ChangeLog,v rcsdiff -q -kk '-r1.32' '-r1.33' -u '/v/ossp/cvs/ossp-pkg/str/ChangeLog,v' 2>/dev/null --- ChangeLog 2001/08/16 12:21:21 1.32 +++ ChangeLog 2001/08/16 13:17:00 1.33 @@ -10,6 +10,11 @@ ChangeLog Changes between 0.9.4 and 0.9.5 (14-Jul-2000 to 16-Aug-2001): + + *) Fix return code documentation of str_parse(): it -1 (error), 0 + (no matching) or +1 (matching) and not just TRUE or FALSE. + Additionally fixed the str_parse() examples in the documentation. + [Ralf S. Engelschall] *) Let str_base64(STR_BASE64_DECODE, ...) correctly honor the specified maximum size of the input string. Index: ossp-pkg/str/str.3 RCS File: /v/ossp/cvs/ossp-pkg/str/Attic/str.3,v rcsdiff -q -kk '-r1.33' '-r1.34' -u '/v/ossp/cvs/ossp-pkg/str/Attic/str.3,v' 2>/dev/null --- str.3 2001/04/27 12:22:21 1.33 +++ str.3 2001/08/16 13:17:00 1.34 @@ -1,5 +1,5 @@ -.\" Automatically generated by Pod::Man version 1.02 -.\" Sun Dec 31 12:23:40 2000 +.\" Automatically generated by Pod::Man version 1.15 +.\" Thu Aug 16 15:15:59 2001 .\" .\" Standard preamble: .\" ====================================================================== @@ -46,8 +46,8 @@ . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" -. ds C` ` -. ds C' ' +. ds C` "" +. ds C' "" 'br\} .el\{\ . ds -- \|\(em\| @@ -63,7 +63,7 @@ .if \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" -. . +.. . nr % 0 . rr F .\} @@ -283,8 +283,10 @@ .Ip "int \fBstr_parse\fR(const char *\fIs\fR, const char *\fIpop\fR, ...);" 4 .IX Item "int str_parse(const char *s, const char *pop, ...);" This parses the string \fIs\fR according to the parsing operation specified -by \fIpop\fR. If the parsing operation succeeds, \f(CW\*(C`TRUE\*(C'\fR is returned. Else -\&\f(CW\*(C`FALSE\*(C'\fR is returned. +by \fIpop\fR. If the parsing operation succeeds, \f(CW\*(C`1\*(C'\fR is returned. If the +parsing operation failed because the pattern \fIpop\fR did not match, \f(CW\*(C`0\*(C'\fR +is returned. If the parsing operation failed because the underlying +regular expression library failed, \f(CW\*(C`\-1\*(C'\fR is returned. .Sp The \fIpop\fR string usually has one of the following two syntax variants: `\fBm\fR \fIdelim\fR \fIregex\fR \fIdelim\fR \fIflags\fR*' (for matching operations) @@ -699,7 +701,7 @@ .IX Item "Match a String" .Vb 5 \& char *var = "foo:bar"; -\& if (str_parse(var, "^.+?:.+$/)) { +\& if (str_parse(var, "^.+?:.+$/) > 0) { \& /* var matched */ \& ... \& } @@ -709,7 +711,7 @@ .Vb 10 \& char *var = "foo:bar"; \& char *cp, *v1, *v2; -\& if (str_parse(var, "m/^(.+?):(.+)$/b", &cp, &v1, &v2)) { +\& if (str_parse(var, "m/^(.+?):(.+)$/b", &cp, &v1, &v2) > 0) { \& ... \& /* now we have: \& cp = "foo\e0bar\e0" and v1 and v2 pointing @@ -758,11 +760,12 @@ recycled: for the \fIstr_token\fR\|(3) implementation an anchient \fIstrtok\fR\|(3) flavor from William Deich 1991 was cleaned up and adjusted. As the background parsing engine for \fIstr_parse\fR\|(3) a heavily stripped down -version of Philip Hazel's \s-1PCRE\s0 2.08 library was used. The \fIstr_format\fR\|(3) +version of Philip Hazel's Perl Compatible Regular Expression (\s-1PCRE\s0) +library (initially version 2.08 and now 3.5) was used. The \fIstr_format\fR\|(3) implementation was based on Panos Tsirigotis' \fIsprintf\fR\|(3) code as -adjusted by the Apache Software Foundation 1998. The formatting engine -was stripped down and enhanced to support internal extensions which were -required by \fIstr_format\fR\|(3) and \fIstr_parse\fR\|(3). +adjusted by the Apache Software Foundation (\s-1ASF\s0) 1998. The formatting +engine was stripped down and enhanced to support internal extensions +which were required by \fIstr_format\fR\|(3) and \fIstr_parse\fR\|(3). .SH "AUTHOR" .IX Header "AUTHOR" .Vb 3 Index: ossp-pkg/str/str.pod RCS File: /v/ossp/cvs/ossp-pkg/str/str.pod,v co -q -kk -p'1.27' '/v/ossp/cvs/ossp-pkg/str/str.pod,v' | diff -u /dev/null - -L'ossp-pkg/str/str.pod' 2>/dev/null --- ossp-pkg/str/str.pod +++ - 2024-05-17 12:14:46.769755275 +0200 @@ -0,0 +1,774 @@ +## +## Str - String Library +## Copyright (c) 1999-2000 Ralf S. Engelschall +## +## This file is part of Str, a string handling and manipulation +## library which can be found at http://www.engelschall.com/sw/str/. +## +## Permission to use, copy, modify, and distribute this software for +## any purpose with or without fee is hereby granted, provided that +## the above copyright notice and this permission notice appear in all +## copies. +## +## THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED +## WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +## MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +## IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR +## CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +## SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +## LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF +## USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +## ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +## OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT +## OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +## SUCH DAMAGE. +## +## str.pod: Unix manual page +## + +# Parts of this manual page (the str_format description) is: +# +# Copyright (c) 1990, 1991, 1993 +# The Regents of the University of California. All rights reserved. +# +# This code is derived from software contributed to Berkeley by +# Chris Torek and the American National Standards Committee X3, +# on Information Processing Systems. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# 3. All advertising materials mentioning features or use of this software +# must display the following acknowledgement: +# This product includes software developed by the University of +# California, Berkeley and its contributors. +# 4. Neither the name of the University nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. + +=pod + +=head1 NAME + +B - String Library + +=head1 VERSION + +Str STR_VERSION_STR + +=head1 SYNOPSIS + +B, +B, +B, +B, +B, +B, +B, +B, +B, +B, +B, +B, +B. + +=head1 DESCRIPTION + +The B library is a generic string library written in ANSI C which +provides functions for handling, matching, parsing, searching and +formatting of C strings. So it can be considered as a superset of POSIX +string(3), but its main intention is to provide a more convinient and +compact API plus a more generalized functionality. + +=head1 FUNCTIONS + +The following functions are provided by the B API: + +=over 4 + +=item str_size_t B(const char *I); + +This function determines the length of string I, i.e., the number +of characters starting at I that precede the terminating C +character. It returns C if I is C. + +=item char *B(char *I, const char *I, size_t I); + +This copies the characters in string I into the string I, but never more +than I characters (if I is greater than C<0>). The two involved strings +can overlap and the characters in I are always C-terminated. The +string I has to be large enough to hold all characters to be copied. +function returns C if I or I are C. Else it returns the +pointer to the written C-terminating character in I. + +=item char *B(const char *I, str_size_t I); + +This returns a copy of the characters in string I, but never more than I +characters if I is greater than C<0>. It returns C if I is +C. The returned string has to be deallocated later with free(3). + +=item char *B(char *I, ...); + +This functions concatenates the characters of all string arguments into a new +allocated string and returns this new string. If I is C the function +returns C. Else it returns the pointer to the written final +C-terminating character in I. The returned string later has to be +deallicated with free(3). + +=item char *B(char *I, str_size_t I, str_size_t I, char *I, str_size_t I); + +This splices the string I into string I, i.e., the I characters +at offset I in I are removed and at their location the string +I is inserted (or just the first I characters of I if I is +greater than C<0>). It returns C if I or I are C. +Else the string I is returned. The function supports also the +situation where I is a sub-string of I as long as the area +I...I and I...I do not overlap. The caller +always has to make sure that enough room exists in I. + +=item int B(const char *I, const char *I, str_size_t I, int I); + +This performs a lexicographical comparison of the two strings I +and I (but never compares more than I characters of them) +and returns one of three return values: a value lower than C<0> if +I is lexicographically lower than I, a vlue of exactly C<0> +if I and I are equal and a value greater than C<0> if I is +lexicographically higher than I. Per default (I is C<0>) the +comparison is case-sensitive, but if C is used for I +the comparison is done in a case-insensitive way. + +=item char *B(const char *I, size_t I, const char *I, int I); + +This functions spans a string I according to the characters specified in +I. If I is C<0>, this means that I is spanned from left to +right starting at I (and ending either when reaching the terminating C +character or already after I spanned characters) as long as the characters +of I are contained in I. + +Alternatively one can use a I of C to indicate that I +is spanned as long as the characters of I are I contained in +I, i.e., I then specifies the complement of the spanning +characters. + +In both cases one can additionally "or" (with the C operator ``C<|>'') +C into I to indicate that the spanning is done right to +left starting at the terminating C character of I (and ending +either when reaching I or already after I spanned characters). + +=item char *B(const char *I, str_size_t I, const char *I); + +This functions searches for the (smaller) string I inside (larger) string +I. If I is not C<0>, the search is performed only inside the first I +characters of I. + +=item char *B(char **I, const char *I, const char *I, const char *I, int I); + +This function considers the string I to consist of a sequence of +zero or more text tokens separated by spans of one or more characters +from the separator string I. However, text between matched pairs +of quotemarks (characters in I) is treated as plain text, never +as delimiter (separator) text. Each call of this function returns a +pointer to the first character of the first token of I. The token is +C-terminated, i.e., the string I is processed in a destructive +way. If there are quotation marks or escape sequences, the input +string is rewritten with quoted sections and escape sequences properly +interpreted. + +This function keeps track of its parsing position in the string between +separate calls by simply adjusting the callers I pointer, so that +subsequent calls with the same pointer variable I will start +processing from the position immediately after the last returned token. +In this way subsequent calls will work through the string I until no +tokens remain. When no token remains in I, C is returned. The +string of token separators (I) and the string of quote characters +(I) may be changed from call to call. + +If a character in the string I is not quoted or escaped, and is in the +I set, then it is overwritten with a C character and the rest of +the string is ignored. The characters to be used as quote characters are +specified in the I set, and must be used in balanced pairs. If there +is more than one flavor of quote character, one kind of quote character may be +used to quote another kind. If an unbalanced quote is found, the function +silently act as if one had been placed at the end of the input string. The +I and I strings must be disjoint, i.e., they have to share +no characters. + +The I argument can be used to modify the processing of the string +(default for I is C<0>): C forces I +characters to be stripped from quoted tokens; C +enables the interpretation (and expansion) of backslash escape sequences +(`B<\x>') through ANSI-C rules; C forces that after the +terminating C is written and the token returned, further delimiters +are skipped (this allows one to make sure that the delimiters for +one word don't become part of the next word if one change delimiters +between calls); and C enables the recognition and +expansion of ANSI C Trigraph sequences (as a side effect this enables +C, too). + +=item int B(const char *I, const char *I, ...); + +This parses the string I according to the parsing operation specified +by I. If the parsing operation succeeds, C<1> is returned. If the +parsing operation failed because the pattern I did not match, C<0> +is returned. If the parsing operation failed because the underlying +regular expression library failed, C<-1> is returned. + +The I string usually has one of the following two syntax variants: +`B I I I I*' (for matching operations) +and `B I I I I I I*' (for +substitution operations). For more details about the syntax variants +and semantic of the I argument see section B below. The syntax of the I part in I is +mostly equivalent to Perl 5's regular expression syntax. For the +complete and gory details see perlre(1). A brief summary you can find +under section B below. + +=item int B(char *I, str_size_t I, const char *I, ...); + +This formats a new string according to I and optionally following +arguments and writes it into the string I, but never more than I +characters at all. It returns the number of written characters. If I is +C it just calculates the number of characters which would be written. + +The function generates the output string under the control of the I +format string that specifies how subsequent arguments (or arguments accessed +via the variable-length argument facilities of stdarg(3)) are converted for +output. + +The format string I is composed of zero or more directives: +ordinary characters (not B<%>), which are copied unchanged to the output +stream; and conversion specifications, each of which results in fetching +zero or more subsequent arguments. Each conversion specification is +introduced by the character B<%>. The arguments must correspond properly +(after type promotion) with the conversion specifier. Which conversion +specifications are supported are described in detail under B below. + +=item unsigned long B(const char *I, str_size_t I, int I); + +This function calculates a hash value of string I (or of its first I +characters if I is equal to C<0>). The following hashing functions +are supported and can be selected with I: STR_HASH_DJBX33 (Daniel +J. Berstein, Times 33 Hash with Addition), STR_HASH_BJDDJ (Bob +Jenkins, Dr. Dobbs Journal), and STR_HASH_MACRC32 (Mark Adler, Cyclic +Redundancy Check with 32-Bit). This function is intended for fast use +in hashing algorithms and I for use as cryptographically strong +message digests. + +=item int B(char *I, str_size_t I, unsigned char *I, str_size_t I, int I); + +This function Base64 encodes I bytes starting at I and writes +the resulting string into I (but never more than I characters are +written). The I for this operation has to be C. +Additionally one can OR the value C to enable strict +encoding where after every 72th output character a newline character is +inserted. The function returns the number of output characters written. +If I is C the function just calculates the number of required +output characters. + +Alternatively, if I is C the string I (or +the first I characters only if I is not C<0>) is decoded and the +output bytes written at I. Again, if I is C only the +number of required output bytes are calculated. + +=back + +=head1 GORY DETAILS + +In this part of the documentation more complex topics are documented in +detail. + +=head2 Perl Regular Expressions + +The regular expressions used in B are more or less Perl compatible +(they are provided by a stripped down and built-in version of the +I library). So the syntax description in perlre(1) applies +and don't has to be repeated here again. For a deeper understanding +and details you should have a look at the book `I' (see also the perlbook(1) manpage) by I. +For convinience reasons we give you only a brief summary of Perl +compatible regular expressions: + +The following metacharacters have their standard egrep(1) meanings: + + \ Quote the next metacharacter + ^ Match the beginning of the line + . Match any character (except newline) + $ Match the end of the line (or before newline at the end) + | Alternation + () Grouping + [] Character class + +The following standard quantifiers are recognized: + + * Match 0 or more times (greedy) + *? Match 0 or more times (non greedy) + + Match 1 or more times (greedy) + +? Match 1 or more times (non greedy) + ? Match 1 or 0 times (greedy) + ?? Match 1 or 0 times (non greedy) + {n} Match exactly n times (greedy) + {n}? Match exactly n times (non greedy) + {n,} Match at least n times (greedy) + {n,}? Match at least n times (non greedy) + {n,m} Match at least n but not more than m times (greedy) + {n,m}? Match at least n but not more than m times (non greedy) + +The following backslash sequences are recognized: + + \t Tab (HT, TAB) + \n Newline (LF, NL) + \r Return (CR) + \f Form feed (FF) + \a Alarm (bell) (BEL) + \e Escape (think troff) (ESC) + \033 Octal char + \x1B Hex char + \c[ Control char + \l Lowercase next char + \u Uppercase next char + \L Lowercase till \E + \U Uppercase till \E + \E End case modification + \Q Quote (disable) pattern metacharacters till \E + +The following non zero-width assertions are recognized: + + \w Match a "word" character (alphanumeric plus "_") + \W Match a non-word character + \s Match a whitespace character + \S Match a non-whitespace character + \d Match a digit character + \D Match a non-digit character + +The following zero-width assertions are recognized: + + \b Match a word boundary + \B Match a non-(word boundary) + \A Match only at beginning of string + \Z Match only at end of string, or before newline at the end + \z Match only at end of string + \G Match only where previous m//g left off (works only with /g) + +The following regular expression extensions are recognized: + + (?#text) An embedded comment + (?:pattern) This is for clustering, not capturing (simple) + (?imsx-imsx:pattern) This is for clustering, not capturing (full) + (?=pattern) A zero-width positive lookahead assertion + (?!pattern) A zero-width negative lookahead assertion + (?<=pattern) A zero-width positive lookbehind assertion + (?pattern) An "independent" subexpression + (?(cond)yes-re) Conditional expression (simple) + (?(cond)yes-re|no-re) Conditional expression (full) + (?imsx-imsx) One or more embedded pattern-match modifiers + +=head2 Parsing Specification + +The B(const char *I, const char *I, ...) function +is a very flexible but complex one. The argument I is the string on +which the parsing operation specified by argument I is applied. +The parsing semantics are highly influenced by Perl's `B<=~>' matching +operator, because one of the main goals of str_parse(3) is to allow one +to rewrite typical Perl matching constructs into C. + +Now to the gory details. In general, the I argument of str_parse(3) +has one of the following two syntax variants: + +=over 4 + +=item B `B I I I I*': + +This matches I against the Perl-style regular expression I +under the control of zero or more I which control the parsing +semantics. The stripped down I syntax `I' is equivalent to +`BIB'. + +For each grouping pair of parenthesis in I, the text in I +which was grouped by the parenthesis is extracted into new strings. +These per default are allocated as seperate strings and returned to the +caller through following `B' arguments. The caller is required +to free(3) them later. + +=item B `B I I I I I I*': + +This matches I against the Perl-style regular expression I +under the control of zero or more I which control the parsing +semantics. As a result of the operation, a new string formed which +consists of I but with the part which matched I replaced by +I. The result string is returned to the caller through a `B' argument. The caller is required to free(3) this later. + +For each grouping pair of parenthesis in I, the text in I +which was grouped by the parenthesis is extracted into new strings +and can be referenced for expansion via `B<$n>' (n=1,..) in I. +Additionally any str_format(3) style `B<%>' constructs in I are +expanded through additional caller supplied arguments. + +=back + +The following I are supported: + +=over 4 + +=item B + +If the I flag `B' is specified, the extracted strings are +bundled together into a single chunk of memory and its address is +returned to the caller with a additional `B' argument which has +to preceed the regular string arguments. The caller then has to free(3) +only this chunk of memory in order to free all extracted strings at +once. + +=item B + +If the case-I flag `B' is specified, I +is matched in case-insensitive way. + +=item B + +If the I flag `B' is specified, this indicates to the B +library that the whole I string is constant and that its internal +pre-processing (it is compiled into a deterministic finite automaton +(DFA) internally) has to be done only once (the B library then +caches the DFA which corresponds to the I argument). + +=item B + +If the I flag `B' is specified, the I's legibility +is extended by permitting embedded whitespace and comments to allow one +to write down complex regular expressions more cleary and even in a +documented way. + +=item B + +If the I lines flag `B' is specified, the string I is +treated as multiple lines. That is, this changes the regular expression +meta characters `B<^>' and `B<$>' from matching at only the very start +or end of the string I to the start or end of any line anywhere +within the string I. + +=item B + +If the I line flag `B' is specified, the string I is +treated as single line. That is, this changes the regular expression +meta character `B<.>' to match any character whatsoever, even a newline, +which it normally would not match. + +=back + + +=head1 CONVERSION SPECIFICATION + +In the format string of str_format(3) each conversion specification is +introduced by the character B<%>. After the B<%>, the following appear +in sequence: + +=over 4 + +=item o + +An optional field, consisting of a decimal digit string followed by a B<$>, +specifying the next argument to access. If this field is not provided, the +argument following the last argument accessed will be used. Arguments are +numbered starting at B<1>. If unaccessed arguments in the format string are +interspersed with ones that are accessed the results will be indeterminate. + +=item o + +Zero or more of the following flags: + +A B<#> character specifying that the value should be converted to an +``alternate form''. For B, B, B, B, B

, B, and B, +conversions, this option has no effect. For B conversions, the precision +of the number is increased to force the first character of the output string +to a zero (except if a zero value is printed with an explicit precision of +zero). For B and B conversions, a non-zero result has the string B<0x> +(or B<0X> for B conversions) prepended to it. For B, B, B, B, +and B, conversions, the result will always contain a decimal point, even if +no digits follow it (normally, a decimal point appears in the results of those +conversions only if a digit follows). For B and B conversions, trailing +zeros are not removed from the result as they would otherwise be. + +A zero `B<0>' character specifying zero padding. For all conversions except +B, the converted value is padded on the left with zeros rather than blanks. +If a precision is given with a numeric conversion (B, B, B, B, +B, B, and B), the `B<0>' flag is ignored. + +A negative field width flag `B<->' indicates the converted value is to be left +adjusted on the field boundary. Except for B conversions, the converted +value is padded on the right with blanks, rather than on the left with blanks +or zeros. A `B<->' overrides a `B<0>' if both are given. + +A space, specifying that a blank should be left before a positive number +produced by a signed conversion (B, B, B, B, B, B, or B). + +A `B<+>' character specifying that a sign always be placed before a number +produced by a signed conversion. A `B<+>' overrides a space if both are used. + +=item o + +An optional decimal digit string specifying a minimum field width. +If the converted value has fewer characters than the field width, it will +be padded with spaces on the left (or right, if the left-adjustment +flag has been given) to fill out +the field width. + +=item o + +An optional precision, in the form of a period `B<.>' followed by an +optional digit string. If the digit string is omitted, the precision is +taken as zero. This gives the minimum number of digits to appear for +B, B, B, B, B, and B conversions, the number of digits +to appear after the decimal-point for B, B, and B conversions, +the maximum number of significant digits for B and B conversions, +or the maximum number of characters to be printed from a string for B +conversions. + +=item o + +The optional character B, specifying that a following B, B, B, +B, B, or B conversion corresponds to a `C' or `C' argument, or that a following B conversion corresponds to a +pointer to a `C argument. + +=item o + +The optional character B (ell) specifying that a following B, B, +B, B, B, or B conversion applies to a pointer to a `C' +or `C' argument, or that a following B conversion +corresponds to a pointer to a `C argument. + +=item o + +The optional character B, specifying that a following B, B, B, +B, B, or B conversion corresponds to a `C' or `C' argument, or that a following B conversion corresponds to a +pointer to a `C' argument. + +=item o + +The character B specifying that a following B, B, B, B, or B +conversion corresponds to a `C' argument. + +=item o + +A character that specifies the type of conversion to be applied. + +=back + +A field width or precision, or both, may be indicated by an asterisk `B<*>' or +an asterisk followed by one or more decimal digits and a `B<$>' instead of a +digit string. In this case, an `C' argument supplies the field width or +precision. A negative field width is treated as a left adjustment flag +followed by a positive field width; a negative precision is treated as though +it were missing. If a single format directive mixes positional (`B') and +non-positional arguments, the results are undefined. + +The conversion specifiers and their meanings are: + +=over 4 + +=item B + +The `C' (or appropriate variant) argument is converted to signed decimal +(B and B), unsigned octal (B), unsigned decimal (B), or unsigned +hexadecimal (B and B) notation. The letters B are used for B +conversions; the letters B are used for B conversions. The +precision, if any, gives the minimum number of digits that must appear; if the +converted value requires fewer digits, it is padded on the left with zeros. + +=item B + +The `C argument is converted to signed decimal, unsigned octal, or +unsigned decimal, as if the format had been B, B, or B +respectively. These conversion characters are deprecated, and will eventually +disappear. + +=item B + +The `C' argument is rounded and converted in the style +`[-]d.dddB+-dd' where there is one digit before the decimal-point character +and the number of digits after it is equal to the precision; if the precision +is missing, it is taken as 6; if the precision is zero, no decimal-point +character appears. An B conversion uses the letter B (rather than B) +to introduce the exponent. The exponent always contains at least two digits; +if the value is zero, the exponent is 00. + +=item B + +The `C' argument is rounded and converted to decimal notation in the +style `[-]ddd.ddd>' where the number of digits after the decimal-point +character is equal to the precision specification. If the precision is +missing, it is taken as 6; if the precision is explicitly zero, no +decimal-point character appears. If a decimal point appears, at least one +digit appears before it. + +=item B + +The `C' argument is converted in style B or B (or B for B +conversions). The precision specifies the number of significant digits. If +the precision is missing, 6 digits are given; if the precision is zero, it is +treated as 1. Style B is used if the exponent from its conversion is less +than -4 or greater than or equal to the precision. Trailing zeros are removed +from the fractional part of the result; a decimal point appears only if it is +followed by at least one digit. + +=item B + +The `C' argument is converted to an `C, and the resulting +character is written. + +=item B + +The `C' argument is expected to be a pointer to an array of character +type (pointer to a string). Characters from the array are written up to (but +not including) a terminating C character; if a precision is specified, no +more than the number specified are written. If a precision is given, no null +character need be present; if the precision is not specified, or is greater +than the size of the array, the array must contain a terminating C +character. + +=item B

+ +The `C pointer argument is printed in hexadecimal (as if by `B<%#x>' +or `C<%#lx>). + +=item B + +The number of characters written so far is stored into the integer indicated +by the `C' (or variant) pointer argument. No argument is converted. + +=item B<%> + +A `B<%>' is written. No argument is converted. The complete conversion +specification is `B<%%>. + +=back + +In no case does a non-existent or small field width cause truncation of a +field; if the result of a conversion is wider than the field width, the field +is expanded to contain the conversion result. + +=head1 EXAMPLES + +In the following a few snippets of selected use cases of B are +presented: + +=over 4 + +=item B + + char *v1 = "foo bar quux"; + char *v2 = "baz"; + str_splice(v1, 3, 5, v2, 0): + /* now we have v1 = "foobazquux" */ + .... + +=item B + + char *var = " foo \t " bar 'baz'" q'uu'x #comment"; + char *tok, *p; + p = var; + while ((tok = str_token(p, ":", "\"'", "#", 0)) != NULL) { + /* here we enter three times: + 1. tok = "foo" + 2. tok = " bar 'baz'" + 3. tok = "quux" */ + ... + } + +=item B + + char *var = "foo:bar"; + if (str_parse(var, "^.+?:.+$/) > 0) { + /* var matched */ + ... + } + +=item B + + char *var = "foo:bar"; + char *cp, *v1, *v2; + if (str_parse(var, "m/^(.+?):(.+)$/b", &cp, &v1, &v2) > 0) { + ... + /* now we have: + cp = "foo\0bar\0" and v1 and v2 pointing + into it, i.e., v1 = "foo", v2 = "bar" */ + ... + free(cp); + } + +=item B + + char *var = "foo:bar"; + char *subst = "quux"; + char *new; + str_parse(var, "s/^(.+?):(.+)$/$1-%s-$2/", &new, subst); + ... + /* now we have: var = "foo:bar", new = "foo:quux:bar" */ + ... + free(new); + +=item B + + char *v0 = "abc..."; /* length not guessable */ + char *v1 = "foo"; + void *v2 = 0xDEAD; + int v3 = 42; + char *cp; + int n; + + n = str_format(NULL, 0, "%s|%5s-%x-%04d", v0, v1, v2, v3); + cp = malloc(n); + str_format(cp, n, "%s-%x-%04d", v1, v2, v3); + /* now we have cp = "abc...| foo-DEAD-0042" */ + ... + free(cp); + +=back + +=head1 SEE ALSO + +string(3), printf(3), perlre(1). + +=head1 HISTORY + +The B library was written in November and December 1999 by Ralf +S. Engelschall. As building blocks various existing code was used and +recycled: for the str_token(3) implementation an anchient strtok(3) +flavor from William Deich 1991 was cleaned up and adjusted. As the +background parsing engine for str_parse(3) a heavily stripped down +version of Philip Hazel's Perl Compatible Regular Expression (PCRE) +library (initially version 2.08 and now 3.5) was used. The str_format(3) +implementation was based on Panos Tsirigotis' sprintf(3) code as +adjusted by the Apache Software Foundation (ASF) 1998. The formatting +engine was stripped down and enhanced to support internal extensions +which were required by str_format(3) and str_parse(3). + +=head1 AUTHOR + + Ralf S. Engelschall + rse@engelschall.com + www.engelschall.com + +=cut +