.\" Automatically generated by Pod::Man version 1.02 .\" Tue May 8 21:21:11 2001 .\" .\" Standard preamble: .\" ====================================================================== .de Sh \" Subsection heading .br .if t .Sp .ne 5 .PP \fB\\$1\fR .PP .. .de Sp \" Vertical space (when we can't use .PP) .if t .sp .5v .if n .sp .. .de Ip \" List item .br .ie \\n(.$>=3 .ne \\$3 .el .ne 3 .IP "\\$1" \\$2 .. .de Vb \" Begin verbatim text .ft CW .nf .ne \\$1 .. .de Ve \" End verbatim text .ft R .fi .. .\" Set up some character translations and predefined strings. \*(-- will .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left .\" double quote, and \*(R" will give a right double quote. | will give a .\" real vertical bar. \*(C+ will give a nicer C++. Capital omega is used .\" to do unbreakable dashes and therefore won't be available. \*(C` and .\" \*(C' expand to `' in nroff, nothing in troff, for use with C<> .tr \(*W-|\(bv\*(Tr .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' .ie n \{\ . ds -- \(*W- . ds PI pi . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch . ds L" "" . ds R" "" . ds C` ` . ds C' ' 'br\} .el\{\ . ds -- \|\(em\| . ds PI \(*p . ds L" `` . ds R" '' 'br\} .\" .\" If the F register is turned on, we'll generate index entries on stderr .\" for titles (.TH), headers (.SH), subsections (.Sh), items (.Ip), and .\" index entries marked with X<> in POD. Of course, you'll have to process .\" the output yourself in some meaningful fashion. .if \nF \{\ . de IX . tm Index:\\$1\t\\n%\t"\\$2" . . . nr % 0 . rr F .\} .\" .\" For nroff, turn off justification. Always turn off hyphenation; it .\" makes way too many mistakes in technical documents. .hy 0 .if n .na .\" .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). .\" Fear. Run. Save yourself. No user-serviceable parts. .bd B 3 . \" fudge factors for nroff and troff .if n \{\ . ds #H 0 . ds #V .8m . ds #F .3m . ds #[ \f1 . ds #] \fP .\} .if t \{\ . ds #H ((1u-(\\\\n(.fu%2u))*.13m) . ds #V .6m . ds #F 0 . ds #[ \& . ds #] \& .\} . \" simple accents for nroff and troff .if n \{\ . ds ' \& . ds ` \& . ds ^ \& . ds , \& . ds ~ ~ . ds / .\} .if t \{\ . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' .\} . \" troff and (daisy-wheel) nroff accents .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' .ds 8 \h'\*(#H'\(*b\h'-\*(#H' .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] .ds ae a\h'-(\w'a'u*4/10)'e .ds Ae A\h'-(\w'A'u*4/10)'E . \" corrections for vroff .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' . \" for low resolution devices (crt and lpr) .if \n(.H>23 .if \n(.V>19 \ \{\ . ds : e . ds 8 ss . ds o a . ds d- d\h'-1'\(ga . ds D- D\h'-1'\(hy . ds th \o'bp' . ds Th \o'LP' . ds ae ae . ds Ae AE .\} .rm #[ #] #H #V #F C .\" ====================================================================== .\" .IX Title "l2 3" .TH l2 3 "08-May-2001" "L2 0.1.0" "Logging Library" .UC .SH "NAME" \&\fBStr\fR \- String Library .SH "VERSION" .IX Header "VERSION" Str \s-10.1.0 (08-May-2001)\s0 .SH "SYNOPSIS" .IX Header "SYNOPSIS" \&\fBstr_len\fR, \&\fBstr_copy\fR, \&\fBstr_dup\fR, \&\fBstr_concat\fR, \&\fBstr_splice\fR, \&\fBstr_compare\fR, \&\fBstr_span\fR, \&\fBstr_locate\fR, \&\fBstr_token\fR, \&\fBstr_parse\fR, \&\fBstr_format\fR, \&\fBstr_hash\fR, \&\fBstr_base64\fR. .SH "DESCRIPTION" .IX Header "DESCRIPTION" The \fBStr\fR library is a generic string library written in \s-1ANSI\s0 C which provides functions for handling, matching, parsing, searching and formatting of C strings. So it can be considered as a superset of \s-1POSIX\s0 \&\fIstring\fR\|(3), but its main intention is to provide a more convinient and compact \s-1API\s0 plus a more generalized functionality. .SH "FUNCTIONS" .IX Header "FUNCTIONS" The following functions are provided by the \fBStr\fR \s-1API:\s0 .Ip "str_size_t \fBstr_len\fR(const char *\fIs\fR);" 4 .IX Item "str_size_t str_len(const char *s);" This function determines the length of string \fIs\fR, i.e., the number of characters starting at \fIs\fR that precede the terminating \f(CW\*(C`NUL\*(C'\fR character. It returns \f(CW\*(C`NULL\*(C'\fR if \fIs\fR is \f(CW\*(C`NULL\*(C'\fR. .Ip "char *\fBstr_copy\fR(char *\fIs\fR, const char *\fIt\fR, size_t \fIn\fR);" 4 .IX Item "char *str_copy(char *s, const char *t, size_t n);" This copies the characters in string \fIt\fR into the string \fIs\fR, but never more than \fIn\fR characters (if \fIn\fR is greater than \f(CW\*(C`0\*(C'\fR). The two involved strings can overlap and the characters in \fIs\fR are always \f(CW\*(C`NUL\*(C'\fR\-terminated. The string \fIs\fR has to be large enough to hold all characters to be copied. function returns \f(CW\*(C`NULL\*(C'\fR if \fIs\fR or \fIt\fR are \f(CW\*(C`NULL\*(C'\fR. Else it returns the pointer to the written \f(CW\*(C`NUL\*(C'\fR\-terminating character in \fIs\fR. .Ip "char *\fBstr_dup\fR(const char *\fIs\fR, str_size_t \fIn\fR);" 4 .IX Item "char *str_dup(const char *s, str_size_t n);" This returns a copy of the characters in string \fIs\fR, but never more than \fIn\fR characters if \fIn\fR is greater than \f(CW\*(C`0\*(C'\fR. It returns \f(CW\*(C`NULL\*(C'\fR if \fIs\fR is \&\f(CW\*(C`NULL\*(C'\fR. The returned string has to be deallocated later with \fIfree\fR\|(3). .Ip "char *\fBstr_concat\fR(char *\fIs\fR, ...);" 4 .IX Item "char *str_concat(char *s, ...);" This functions concatenates the characters of all string arguments into a new allocated string and returns this new string. If \fIs\fR is \f(CW\*(C`NULL\*(C'\fR the function returns \f(CW\*(C`NULL\*(C'\fR. Else it returns the pointer to the written final \&\f(CW\*(C`NUL\*(C'\fR\-terminating character in \fIs\fR. The returned string later has to be deallicated with \fIfree\fR\|(3). .Ip "char *\fBstr_splice\fR(char *\fIs\fR, str_size_t \fIoff\fR, str_size_t \fIn\fR, char *\fIt\fR, str_size_t \fIm\fR);" 4 .IX Item "char *str_splice(char *s, str_size_t off, str_size_t n, char *t, str_size_t m);" This splices the string \fIt\fR into string \fIs\fR, i.e., the \fIn\fR characters at offset \fIoff\fR in \fIs\fR are removed and at their location the string \&\fIt\fR is inserted (or just the first \fIm\fR characters of \fIt\fR if \fIm\fR is greater than \f(CW\*(C`0\*(C'\fR). It returns \f(CW\*(C`NULL\*(C'\fR if \fIs\fR or \fIt\fR are \f(CW\*(C`NULL\*(C'\fR. Else the string \fIs\fR is returned. The function supports also the situation where \fIt\fR is a sub-string of \fIs\fR as long as the area \&\fIs+off\fR...\fIs+off+n\fR and \fIt\fR...\fIt+m\fR do not overlap. The caller always has to make sure that enough room exists in \fIs\fR. .Ip "int \fBstr_compare\fR(const char *\fIs\fR, const char *\fIt\fR, str_size_t \fIn\fR, int \fImode\fR);" 4 .IX Item "int str_compare(const char *s, const char *t, str_size_t n, int mode);" This performs a lexicographical comparison of the two strings \fIs\fR and \fIt\fR (but never compares more than \fIn\fR characters of them) and returns one of three return values: a value lower than \f(CW\*(C`0\*(C'\fR if \&\fIs\fR is lexicographically lower than \fIt\fR, a vlue of exactly \f(CW\*(C`0\*(C'\fR if \fIs\fR and \fIt\fR are equal and a value greater than \f(CW\*(C`0\*(C'\fR if \fIs\fR is lexicographically higher than \fIt\fR. Per default (\fImode\fR is \f(CW\*(C`0\*(C'\fR) the comparison is case-sensitive, but if \f(CW\*(C`STR_NOCASE\*(C'\fR is used for \fImode\fR the comparison is done in a case-insensitive way. .Ip "char *\fBstr_span\fR(const char *\fIs\fR, size_t \fIn\fR, const char *\fIcharset\fR, int \fImode\fR);" 4 .IX Item "char *str_span(const char *s, size_t n, const char *charset, int mode);" This functions spans a string \fIs\fR according to the characters specified in \&\fIcharset\fR. If \fImode\fR is \f(CW\*(C`0\*(C'\fR, this means that \fIs\fR is spanned from left to right starting at \fIs\fR (and ending either when reaching the terminating \f(CW\*(C`NUL\*(C'\fR character or already after \fIn\fR spanned characters) as long as the characters of \fIs\fR are contained in \fIcharset\fR. .Sp Alternatively one can use a \fImode\fR of \f(CW\*(C`STR_COMPLEMENT\*(C'\fR to indicate that \fIs\fR is spanned as long as the characters of \fIs\fR are \fInot\fR contained in \&\fIcharset\fR, i.e., \fIcharset\fR then specifies the complement of the spanning characters. .Sp In both cases one can additionally \*(L"or\*(R" (with the C operator ``\f(CW\*(C`|\*(C'\fR'') \&\f(CW\*(C`STR_RIGHT\*(C'\fR into \fImode\fR to indicate that the spanning is done right to left starting at the terminating \f(CW\*(C`NUL\*(C'\fR character of \fIs\fR (and ending either when reaching \fIs\fR or already after \fIn\fR spanned characters). .Ip "char *\fBstr_locate\fR(const char *\fIs\fR, str_size_t \fIn\fR, const char *\fIt\fR);" 4 .IX Item "char *str_locate(const char *s, str_size_t n, const char *t);" This functions searches for the (smaller) string \fIt\fR inside (larger) string \&\fIs\fR. If \fIn\fR is not \f(CW\*(C`0\*(C'\fR, the search is performed only inside the first \fIn\fR characters of \fIs\fR. .Ip "char *\fBstr_token\fR(char **\fIs\fR, const char *\fIdelim\fR, const char *\fIquote\fR, const char *\fIcomment\fR, int \fImode\fR);" 4 .IX Item "char *str_token(char **s, const char *delim, const char *quote, const char *comment, int mode);" This function considers the string \fIs\fR to consist of a sequence of zero or more text tokens separated by spans of one or more characters from the separator string \fIdelim\fR. However, text between matched pairs of quotemarks (characters in \fIquote\fR) is treated as plain text, never as delimiter (separator) text. Each call of this function returns a pointer to the first character of the first token of \fIs\fR. The token is \&\f(CW\*(C`NUL\*(C'\fR\-terminated, i.e., the string \fIs\fR is processed in a destructive way. If there are quotation marks or escape sequences, the input string is rewritten with quoted sections and escape sequences properly interpreted. .Sp This function keeps track of its parsing position in the string between separate calls by simply adjusting the callers \fIs\fR pointer, so that subsequent calls with the same pointer variable \fIs\fR will start processing from the position immediately after the last returned token. In this way subsequent calls will work through the string \fIs\fR until no tokens remain. When no token remains in \fIs\fR, \f(CW\*(C`NULL\*(C'\fR is returned. The string of token separators (\fIdelim\fR) and the string of quote characters (\fIquote\fR) may be changed from call to call. .Sp If a character in the string \fIs\fR is not quoted or escaped, and is in the \&\fIcomment\fR set, then it is overwritten with a \f(CW\*(C`NUL\*(C'\fR character and the rest of the string is ignored. The characters to be used as quote characters are specified in the \fIquote\fR set, and must be used in balanced pairs. If there is more than one flavor of quote character, one kind of quote character may be used to quote another kind. If an unbalanced quote is found, the function silently act as if one had been placed at the end of the input string. The \&\fIdelim\fR and \fIquote\fR strings must be disjoint, i.e., they have to share no characters. .Sp The \fImode\fR argument can be used to modify the processing of the string (default for \fImode\fR is \f(CW\*(C`0\*(C'\fR): \f(CW\*(C`STR_STRIPQUOTES\*(C'\fR forces \fIquote\fR characters to be stripped from quoted tokens; \f(CW\*(C`STR_BACKSLASHESC\*(C'\fR enables the interpretation (and expansion) of backslash escape sequences (`\fB\ex\fR') through \s-1ANSI-C\s0 rules; \f(CW\*(C`STR_SKIPDELIMS\*(C'\fR forces that after the terminating \f(CW\*(C`NUL\*(C'\fR is written and the token returned, further delimiters are skipped (this allows one to make sure that the delimiters for one word don't become part of the next word if one change delimiters between calls); and \f(CW\*(C`STR_TRIGRAPHS\*(C'\fR enables the recognition and expansion of \s-1ANSI\s0 C Trigraph sequences (as a side effect this enables \&\f(CW\*(C`STR_BACKSLASHESC\*(C'\fR, too). .Ip "int \fBstr_parse\fR(const char *\fIs\fR, const char *\fIpop\fR, ...);" 4 .IX Item "int str_parse(const char *s, const char *pop, ...);" This parses the string \fIs\fR according to the parsing operation specified by \fIpop\fR. If the parsing operation succeeds, \f(CW\*(C`TRUE\*(C'\fR is returned. Else \&\f(CW\*(C`FALSE\*(C'\fR is returned. .Sp The \fIpop\fR string usually has one of the following two syntax variants: `\fBm\fR \fIdelim\fR \fIregex\fR \fIdelim\fR \fIflags\fR*' (for matching operations) and `\fBs\fR \fIdelim\fR \fIregex\fR \fIdelim\fR \fIsubst\fR \fIdelim\fR \fIflags\fR*' (for substitution operations). For more details about the syntax variants and semantic of the \fIpop\fR argument see section \fB\s-1GORY\s0 \s-1DETAILS\s0, Parsing Specification\fR below. The syntax of the \fIregex\fR part in \fIpop\fR is mostly equivalent to Perl 5's regular expression syntax. For the complete and gory details see \fIperlre\fR\|(1). A brief summary you can find under section \fB\s-1GORY\s0 \s-1DETAILS\s0, Perl Regular Expressions\fR below. .Ip "int \fBstr_format\fR(char *\fIs\fR, str_size_t \fIn\fR, const char *\fIfmt\fR, ...);" 4 .IX Item "int str_format(char *s, str_size_t n, const char *fmt, ...);" This formats a new string according to \fIfmt\fR and optionally following arguments and writes it into the string \fIs\fR, but never more than \fIn\fR characters at all. It returns the number of written characters. If \fIs\fR is \&\f(CW\*(C`NULL\*(C'\fR it just calculates the number of characters which would be written. .Sp The function generates the output string under the control of the \fIfmt\fR format string that specifies how subsequent arguments (or arguments accessed via the variable-length argument facilities of \fIstdarg\fR\|(3)) are converted for output. .Sp The format string \fIfmt\fR is composed of zero or more directives: ordinary characters (not \fB%\fR), which are copied unchanged to the output stream; and conversion specifications, each of which results in fetching zero or more subsequent arguments. Each conversion specification is introduced by the character \fB%\fR. The arguments must correspond properly (after type promotion) with the conversion specifier. Which conversion specifications are supported are described in detail under \fB\s-1GORY\s0 \&\s-1DETAILS\s0, Format Specification\fR below. .Ip "unsigned long \fBstr_hash\fR(const char *\fIs\fR, str_size_t \fIn\fR, int \fImode\fR);" 4 .IX Item "unsigned long str_hash(const char *s, str_size_t n, int mode);" This function calculates a hash value of string \fIs\fR (or of its first \fIn\fR characters if \fIn\fR is equal to \f(CW\*(C`0\*(C'\fR). The following hashing functions are supported and can be selected with \fImode\fR: \s-1STR_HASH_DJBX33\s0 (Daniel J. Berstein, Times 33 Hash with Addition), \s-1STR_HASH_BJDDJ\s0 (Bob Jenkins, Dr. Dobbs Journal), and \s-1STR_HASH_MACRC32\s0 (Mark Adler, Cyclic Redundancy Check with 32\-Bit). This function is intended for fast use in hashing algorithms and \fInot\fR for use as cryptographically strong message digests. .Ip "int \fBstr_base64\fR(char *\fIs\fR, str_size_t \fIn\fR, unsigned char *\fIucp\fR, str_size_t \fIucn\fR, int \fImode\fR);" 4 .IX Item "int str_base64(char *s, str_size_t n, unsigned char *ucp, str_size_t ucn, int mode);" This function Base64 encodes \fIucn\fR bytes starting at \fIucp\fR and writes the resulting string into \fIs\fR (but never more than \fIn\fR characters are written). The \fImode\fR for this operation has to be \f(CW\*(C`STR_BASE64_ENCODE\*(C'\fR. Additionally one can \s-1OR\s0 the value \f(CW\*(C`STR_BASE64_STRICT\*(C'\fR to enable strict encoding where after every 72th output character a newline character is inserted. The function returns the number of output characters written. If \fIs\fR is \f(CW\*(C`NULL\*(C'\fR the function just calculates the number of required output characters. .Sp Alternatively, if \fImode\fR is \f(CW\*(C`STR_BASE64_DECODE\*(C'\fR the string \fIs\fR (or the first \fIn\fR characters only if \fIn\fR is not \f(CW\*(C`0\*(C'\fR) is decoded and the output bytes written at \fIucp\fR. Again, if \fIucp\fR is \f(CW\*(C`NULL\*(C'\fR only the number of required output bytes are calculated. .SH "GORY DETAILS" .IX Header "GORY DETAILS" In this part of the documentation more complex topics are documented in detail. .Sh "Perl Regular Expressions" .IX Subsection "Perl Regular Expressions" The regular expressions used in \fBStr\fR are more or less Perl compatible (they are provided by a stripped down and built-in version of the \&\fI\s-1PCRE\s0\fR library). So the syntax description in \fIperlre\fR\|(1) applies and don't has to be repeated here again. For a deeper understanding and details you should have a look at the book `\fIMastering Regular Expressions\fR' (see also the \fIperlbook\fR\|(1) manpage) by \fIJeffrey Friedl\fR. For convinience reasons we give you only a brief summary of Perl compatible regular expressions: .PP The following metacharacters have their standard \fIegrep\fR\|(1) meanings: .PP .Vb 7 \& \e Quote the next metacharacter \& ^ Match the beginning of the line \& . Match any character (except newline) \& $ Match the end of the line (or before newline at the end) \& | Alternation \& () Grouping \& [] Character class .Ve The following standard quantifiers are recognized: .PP .Vb 12 \& * Match 0 or more times (greedy) \& *? Match 0 or more times (non greedy) \& + Match 1 or more times (greedy) \& +? Match 1 or more times (non greedy) \& ? Match 1 or 0 times (greedy) \& ?? Match 1 or 0 times (non greedy) \& {n} Match exactly n times (greedy) \& {n}? Match exactly n times (non greedy) \& {n,} Match at least n times (greedy) \& {n,}? Match at least n times (non greedy) \& {n,m} Match at least n but not more than m times (greedy) \& {n,m}? Match at least n but not more than m times (non greedy) .Ve The following backslash sequences are recognized: .PP .Vb 15 \& \et Tab (HT, TAB) \& \en Newline (LF, NL) \& \er Return (CR) \& \ef Form feed (FF) \& \ea Alarm (bell) (BEL) \& \ee Escape (think troff) (ESC) \& \e033 Octal char \& \ex1B Hex char \& \ec[ Control char \& \el Lowercase next char \& \eu Uppercase next char \& \eL Lowercase till \eE \& \eU Uppercase till \eE \& \eE End case modification \& \eQ Quote (disable) pattern metacharacters till \eE .Ve The following non zero-width assertions are recognized: .PP .Vb 6 \& \ew Match a "word" character (alphanumeric plus "_") \& \eW Match a non-word character \& \es Match a whitespace character \& \eS Match a non-whitespace character \& \ed Match a digit character \& \eD Match a non-digit character .Ve The following zero-width assertions are recognized: .PP .Vb 6 \& \eb Match a word boundary \& \eB Match a non-(word boundary) \& \eA Match only at beginning of string \& \eZ Match only at end of string, or before newline at the end \& \ez Match only at end of string \& \eG Match only where previous m//g left off (works only with /g) .Ve The following regular expression extensions are recognized: .PP .Vb 11 \& (?#text) An embedded comment \& (?:pattern) This is for clustering, not capturing (simple) \& (?imsx-imsx:pattern) This is for clustering, not capturing (full) \& (?=pattern) A zero-width positive lookahead assertion \& (?!pattern) A zero-width negative lookahead assertion \& (?<=pattern) A zero-width positive lookbehind assertion \& (?pattern) An "independent" subexpression \& (?(cond)yes-re) Conditional expression (simple) \& (?(cond)yes-re|no-re) Conditional expression (full) \& (?imsx-imsx) One or more embedded pattern-match modifiers .Ve .Sh "Parsing Specification" .IX Subsection "Parsing Specification" The \fBstr_parse\fR(const char *\fIs\fR, const char *\fIpop\fR, ...) function is a very flexible but complex one. The argument \fIs\fR is the string on which the parsing operation specified by argument \fIpop\fR is applied. The parsing semantics are highly influenced by Perl's `\fB=~\fR' matching operator, because one of the main goals of \fIstr_parse\fR\|(3) is to allow one to rewrite typical Perl matching constructs into C. .PP Now to the gory details. In general, the \fIpop\fR argument of \fIstr_parse\fR\|(3) has one of the following two syntax variants: .Ip "\fBMatching:\fR `\fBm\fR \fIdelim\fR \fIregex\fR \fIdelim\fR \fIflags\fR*':" 4 .IX Item "Matching: `m delim regex delim flags*':" This matches \fIs\fR against the Perl-style regular expression \fIregex\fR under the control of zero or more \fIflags\fR which control the parsing semantics. The stripped down \fIpop\fR syntax `\fIregex\fR' is equivalent to `\fBm/\fR\fIregex\fR\fB/\fR'. .Sp For each grouping pair of parenthesis in \fIregex\fR, the text in \fIs\fR which was grouped by the parenthesis is extracted into new strings. These per default are allocated as seperate strings and returned to the caller through following `\fBchar **\fR' arguments. The caller is required to \fIfree\fR\|(3) them later. .Ip "\fBSubstitution:\fR `\fBs\fR \fIdelim\fR \fIregex\fR \fIdelim\fR \fIsubst\fR \fIdelim\fR \fIflags\fR*':" 4 .IX Item "Substitution: `s delim regex delim subst delim flags*':" This matches \fIs\fR against the Perl-style regular expression \fIregex\fR under the control of zero or more \fIflags\fR which control the parsing semantics. As a result of the operation, a new string formed which consists of \fIs\fR but with the part which matched \fIregex\fR replaced by \&\fIsubst\fR. The result string is returned to the caller through a `\fBchar **\fR' argument. The caller is required to \fIfree\fR\|(3) this later. .Sp For each grouping pair of parenthesis in \fIregex\fR, the text in \fIs\fR which was grouped by the parenthesis is extracted into new strings and can be referenced for expansion via `\fB$n\fR' (n=1,..) in \fIsubst\fR. Additionally any \fIstr_format\fR\|(3) style `\fB%\fR' constructs in \fIsubst\fR are expanded through additional caller supplied arguments. .PP The following \fIflags\fR are supported: .Ip "\fBb\fR" 4 .IX Item "b" If the \fIbundle\fR flag `\fBb\fR' is specified, the extracted strings are bundled together into a single chunk of memory and its address is returned to the caller with a additional `\fBchar **\fR' argument which has to preceed the regular string arguments. The caller then has to \fIfree\fR\|(3) only this chunk of memory in order to free all extracted strings at once. .Ip "\fBi\fR" 4 .IX Item "i" If the case-\fIinsensitive\fR flag `\fBi\fR' is specified, \fIregex\fR is matched in case-insensitive way. .Ip "\fBo\fR" 4 .IX Item "o" If the \fIonce\fR flag `\fBo\fR' is specified, this indicates to the \fBStr\fR library that the whole \fIpop\fR string is constant and that its internal pre-processing (it is compiled into a deterministic finite automaton (\s-1DFA\s0) internally) has to be done only once (the \fBStr\fR library then caches the \s-1DFA\s0 which corresponds to the \fIpop\fR argument). .Ip "\fBx\fR" 4 .IX Item "x" If the \fIextended\fR flag `\fBx\fR' is specified, the \fIregex\fR's legibility is extended by permitting embedded whitespace and comments to allow one to write down complex regular expressions more cleary and even in a documented way. .Ip "\fBm\fR" 4 .IX Item "m" If the \fImultiple\fR lines flag `\fBm\fR' is specified, the string \fIs\fR is treated as multiple lines. That is, this changes the regular expression meta characters `\fB^\fR' and `\fB$\fR' from matching at only the very start or end of the string \fIs\fR to the start or end of any line anywhere within the string \fIs\fR. .Ip "\fBs\fR" 4 .IX Item "s" If the \fIsingle\fR line flag `\fBs\fR' is specified, the string \fIs\fR is treated as single line. That is, this changes the regular expression meta character `\fB.\fR' to match any character whatsoever, even a newline, which it normally would not match. .SH "CONVERSION SPECIFICATION" .IX Header "CONVERSION SPECIFICATION" In the format string of \fIstr_format\fR\|(3) each conversion specification is introduced by the character \fB%\fR. After the \fB%\fR, the following appear in sequence: .Ip "o" 4 An optional field, consisting of a decimal digit string followed by a \fB$\fR, specifying the next argument to access. If this field is not provided, the argument following the last argument accessed will be used. Arguments are numbered starting at \fB1\fR. If unaccessed arguments in the format string are interspersed with ones that are accessed the results will be indeterminate. .Ip "o" 4 Zero or more of the following flags: .Sp A \fB#\fR character specifying that the value should be converted to an ``alternate form''. For \fBc\fR, \fBd\fR, \fBi\fR, \fBn\fR, \fBp\fR, \fBs\fR, and \fBu\fR, conversions, this option has no effect. For \fBo\fR conversions, the precision of the number is increased to force the first character of the output string to a zero (except if a zero value is printed with an explicit precision of zero). For \fBx\fR and \fBX\fR conversions, a non-zero result has the string \fB0x\fR (or \fB0X\fR for \fBX\fR conversions) prepended to it. For \fBe\fR, \fBE\fR, \fBf\fR, \fBg\fR, and \fBG\fR, conversions, the result will always contain a decimal point, even if no digits follow it (normally, a decimal point appears in the results of those conversions only if a digit follows). For \fBg\fR and \fBG\fR conversions, trailing zeros are not removed from the result as they would otherwise be. .Sp A zero `\fB0\fR' character specifying zero padding. For all conversions except \&\fBn\fR, the converted value is padded on the left with zeros rather than blanks. If a precision is given with a numeric conversion (\fBd\fR, \fBi\fR, \fBo\fR, \fBu\fR, \&\fBi\fR, \fBx\fR, and \fBX\fR), the `\fB0\fR' flag is ignored. .Sp A negative field width flag `\fB-\fR' indicates the converted value is to be left adjusted on the field boundary. Except for \fBn\fR conversions, the converted value is padded on the right with blanks, rather than on the left with blanks or zeros. A `\fB-\fR' overrides a `\fB0\fR' if both are given. .Sp A space, specifying that a blank should be left before a positive number produced by a signed conversion (\fBd\fR, \fBe\fR, \fBE\fR, \fBf\fR, \fBg\fR, \fBG\fR, or \fBi\fR). .Sp A `\fB+\fR' character specifying that a sign always be placed before a number produced by a signed conversion. A `\fB+\fR' overrides a space if both are used. .Ip "o" 4 An optional decimal digit string specifying a minimum field width. If the converted value has fewer characters than the field width, it will be padded with spaces on the left (or right, if the left-adjustment flag has been given) to fill out the field width. .Ip "o" 4 An optional precision, in the form of a period `\fB.\fR' followed by an optional digit string. If the digit string is omitted, the precision is taken as zero. This gives the minimum number of digits to appear for \&\fBd\fR, \fBi\fR, \fBo\fR, \fBu\fR, \fBx\fR, and \fBX\fR conversions, the number of digits to appear after the decimal-point for \fBe\fR, \fBE\fR, and \fBf\fR conversions, the maximum number of significant digits for \fBg\fR and \fBG\fR conversions, or the maximum number of characters to be printed from a string for \fBs\fR conversions. .Ip "o" 4 The optional character \fBh\fR, specifying that a following \fBd\fR, \fBi\fR, \fBo\fR, \&\fBu\fR, \fBx\fR, or \fBX\fR conversion corresponds to a `\f(CW\*(C`short int\*(C'\fR' or `\f(CW\*(C`unsigned short int\*(C'\fR' argument, or that a following \fBn\fR conversion corresponds to a pointer to a `\f(CW\*(C`short int\*(C'\fR argument. .Ip "o" 4 The optional character \fBl\fR (ell) specifying that a following \fBd\fR, \fBi\fR, \&\fBo\fR, \fBu\fR, \fBx\fR, or \fBX\fR conversion applies to a pointer to a `\f(CW\*(C`long int\*(C'\fR' or `\f(CW\*(C`unsigned long int\*(C'\fR' argument, or that a following \fBn\fR conversion corresponds to a pointer to a `\f(CW\*(C`long int\*(C'\fR argument. .Ip "o" 4 The optional character \fBq\fR, specifying that a following \fBd\fR, \fBi\fR, \fBo\fR, \&\fBu\fR, \fBx\fR, or \fBX\fR conversion corresponds to a `\f(CW\*(C`quad int\*(C'\fR' or `\f(CW\*(C`unsigned quad int\*(C'\fR' argument, or that a following \fBn\fR conversion corresponds to a pointer to a `\f(CW\*(C`quad int\*(C'\fR' argument. .Ip "o" 4 The character \fBL\fR specifying that a following \fBe\fR, \fBE\fR, \fBf\fR, \fBg\fR, or \fBG\fR conversion corresponds to a `\f(CW\*(C`long double\*(C'\fR' argument. .Ip "o" 4 A character that specifies the type of conversion to be applied. .PP A field width or precision, or both, may be indicated by an asterisk `\fB*\fR' or an asterisk followed by one or more decimal digits and a `\fB$\fR' instead of a digit string. In this case, an `\f(CW\*(C`int\*(C'\fR' argument supplies the field width or precision. A negative field width is treated as a left adjustment flag followed by a positive field width; a negative precision is treated as though it were missing. If a single format directive mixes positional (`\fBnn$\fR') and non-positional arguments, the results are undefined. .PP The conversion specifiers and their meanings are: .Ip "\fBdiouxX\fR" 4 .IX Item "diouxX" The `\f(CW\*(C`int\*(C'\fR' (or appropriate variant) argument is converted to signed decimal (\fBd\fR and \fBi\fR), unsigned octal (\fBo\fR), unsigned decimal (\fBu\fR), or unsigned hexadecimal (\fBx\fR and \fBX\fR) notation. The letters \fBabcdef\fR are used for \fBx\fR conversions; the letters \fB\s-1ABCDEF\s0\fR are used for \fBX\fR conversions. The precision, if any, gives the minimum number of digits that must appear; if the converted value requires fewer digits, it is padded on the left with zeros. .Ip "\fB\s-1DOU\s0\fR" 4 .IX Item "DOU" The `\f(CW\*(C`long int\*(C'\fR argument is converted to signed decimal, unsigned octal, or unsigned decimal, as if the format had been \fBld\fR, \fBlo\fR, or \fBlu\fR respectively. These conversion characters are deprecated, and will eventually disappear. .Ip "\fBeE\fR" 4 .IX Item "eE" The `\f(CW\*(C`double\*(C'\fR' argument is rounded and converted in the style `[\-]d.ddd\fBe\fR+\-dd' where there is one digit before the decimal-point character and the number of digits after it is equal to the precision; if the precision is missing, it is taken as 6; if the precision is zero, no decimal-point character appears. An \fBE\fR conversion uses the letter \fBE\fR (rather than \fBe\fR) to introduce the exponent. The exponent always contains at least two digits; if the value is zero, the exponent is 00. .Ip "\fBf\fR" 4 .IX Item "f" The `\f(CW\*(C`double\*(C'\fR' argument is rounded and converted to decimal notation in the style `[\-]ddd.ddd>' where the number of digits after the decimal-point character is equal to the precision specification. If the precision is missing, it is taken as 6; if the precision is explicitly zero, no decimal-point character appears. If a decimal point appears, at least one digit appears before it. .Ip "\fBg\fR" 4 .IX Item "g" The `\f(CW\*(C`double\*(C'\fR' argument is converted in style \fBf\fR or \fBe\fR (or \fBE\fR for \fBG\fR conversions). The precision specifies the number of significant digits. If the precision is missing, 6 digits are given; if the precision is zero, it is treated as 1. Style \fBe\fR is used if the exponent from its conversion is less than \-4 or greater than or equal to the precision. Trailing zeros are removed from the fractional part of the result; a decimal point appears only if it is followed by at least one digit. .Ip "\fBc\fR" 4 .IX Item "c" The `\f(CW\*(C`int\*(C'\fR' argument is converted to an `\f(CW\*(C`unsigned char\*(C'\fR, and the resulting character is written. .Ip "\fBs\fR" 4 .IX Item "s" The `\f(CW\*(C`char *\*(C'\fR' argument is expected to be a pointer to an array of character type (pointer to a string). Characters from the array are written up to (but not including) a terminating \f(CW\*(C`NUL\*(C'\fR character; if a precision is specified, no more than the number specified are written. If a precision is given, no null character need be present; if the precision is not specified, or is greater than the size of the array, the array must contain a terminating \f(CW\*(C`NUL\*(C'\fR character. .Ip "\fBp\fR" 4 .IX Item "p" The `\f(CW\*(C`void *\*(C'\fR pointer argument is printed in hexadecimal (as if by `\fB%#x\fR' or `\f(CW\*(C`%#lx\*(C'\fR). .Ip "\fBn\fR" 4 .IX Item "n" The number of characters written so far is stored into the integer indicated by the `\f(CW\*(C`int *\*(C'\fR' (or variant) pointer argument. No argument is converted. .Ip "\fB%\fR" 4 .IX Item "%" A `\fB%\fR' is written. No argument is converted. The complete conversion specification is `\fB%%\fR. .PP In no case does a non-existent or small field width cause truncation of a field; if the result of a conversion is wider than the field width, the field is expanded to contain the conversion result. .SH "EXAMPLES" .IX Header "EXAMPLES" In the following a few snippets of selected use cases of \fBStr\fR are presented: .Ip "\fBSplice a String into Another\fR" 4 .IX Item "Splice a String into Another" .Vb 5 \& char *v1 = "foo bar quux"; \& char *v2 = "baz"; \& str_splice(v1, 3, 5, v2, 0): \& /* now we have v1 = "foobazquux" */ \& .... .Ve .Ip "\fBTokenize a String\fR" 4 .IX Item "Tokenize a String" .Vb 10 \& char *var = " foo \et " bar 'baz'" q'uu'x #comment"; \& char *tok, *p; \& p = var; \& while ((tok = str_token(p, ":", "\e"'", "#", 0)) != NULL) { \& /* here we enter three times: \& 1. tok = "foo" \& 2. tok = " bar 'baz'" \& 3. tok = "quux" */ \& ... \& } .Ve .Ip "\fBMatch a String\fR" 4 .IX Item "Match a String" .Vb 5 \& char *var = "foo:bar"; \& if (str_parse(var, "^.+?:.+$/)) { \& /* var matched */ \& ... \& } .Ve .Ip "\fBMatch a String and Go Ahead with Details\fR" 4 .IX Item "Match a String and Go Ahead with Details" .Vb 10 \& char *var = "foo:bar"; \& char *cp, *v1, *v2; \& if (str_parse(var, "m/^(.+?):(.+)$/b", &cp, &v1, &v2)) { \& ... \& /* now we have: \& cp = "foo\e0bar\e0" and v1 and v2 pointing \& into it, i.e., v1 = "foo", v2 = "bar" */ \& ... \& free(cp); \& } .Ve .Ip "\fBSubstitute Text in a String\fR" 4 .IX Item "Substitute Text in a String" .Vb 8 \& char *var = "foo:bar"; \& char *subst = "quux"; \& char *new; \& str_parse(var, "s/^(.+?):(.+)$/$1-%s-$2/", &new, subst); \& ... \& /* now we have: var = "foo:bar", new = "foo:quux:bar" */ \& ... \& free(new); .Ve .Ip "\fBFormat a String\fR" 4 .IX Item "Format a String" .Vb 6 \& char *v0 = "abc..."; /* length not guessable */ \& char *v1 = "foo"; \& void *v2 = 0xDEAD; \& int v3 = 42; \& char *cp; \& int n; .Ve .Vb 6 \& n = str_format(NULL, 0, "%s|%5s-%x-%04d", v0, v1, v2, v3); \& cp = malloc(n); \& str_format(cp, n, "%s-%x-%04d", v1, v2, v3); \& /* now we have cp = "abc...| foo-DEAD-0042" */ \& ... \& free(cp); .Ve .SH "SEE ALSO" .IX Header "SEE ALSO" \&\fIstring\fR\|(3), \fIprintf\fR\|(3), \fIperlre\fR\|(1). .SH "HISTORY" .IX Header "HISTORY" The \fBStr\fR library was written in November and December 1999 by Ralf S. Engelschall. As building blocks various existing code was used and recycled: for the \fIstr_token\fR\|(3) implementation an anchient \fIstrtok\fR\|(3) flavor from William Deich 1991 was cleaned up and adjusted. As the background parsing engine for \fIstr_parse\fR\|(3) a heavily stripped down version of Philip Hazel's \s-1PCRE\s0 2.08 library was used. The \fIstr_format\fR\|(3) implementation was based on Panos Tsirigotis' \fIsprintf\fR\|(3) code as adjusted by the Apache Software Foundation 1998. The formatting engine was stripped down and enhanced to support internal extensions which were required by \fIstr_format\fR\|(3) and \fIstr_parse\fR\|(3). .SH "AUTHOR" .IX Header "AUTHOR" .Vb 3 \& Ralf S. Engelschall \& rse@engelschall.com \& www.engelschall.com .Ve