OSSP: CVS Repository: ossp-pkg/str/str

ossp-pkg/str/str_base64.c 1.8
/*
**  OSSP str - String Handling
**  Copyright (c) 1999-2005 Ralf S. Engelschall <rse@engelschall.com>
**  Copyright (c) 1999-2005 The OSSP Project <http://www.ossp.org/>
**
**  This file is part of OSSP str, a string handling and manipulation
**  library which can be found at http://www.ossp.org/pkg/lib/str/.
**
**  Permission to use, copy, modify, and distribute this software for
**  any purpose with or without fee is hereby granted, provided that
**  the above copyright notice and this permission notice appear in all
**  copies.
**
**  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
**  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
**  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
**  IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
**  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
**  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
**  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
**  USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
**  ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
**  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
**  OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
**  SUCH DAMAGE.
**
**  str_convert.c: string to byte conversions and vice versa
*/

#include "str_p.h"

/*
 * Copyright (c) 1996, 1998 by Internet Software Consortium.
 *
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
 * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
 * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 * SOFTWARE.
 */

/*
 * Portions Copyright (c) 1995 by International Business Machines, Inc.
 *
 * International Business Machines, Inc. (hereinafter called IBM) grants
 * permission under its copyrights to use, copy, modify, and distribute this
 * Software with or without fee, provided that the above copyright notice and
 * all paragraphs of this notice appear in all copies, and that the name of IBM
 * not be used in connection with the marketing of any product incorporating
 * the Software or modifications thereof, without specific, written prior
 * permission.
 *
 * To the extent it has a right to do so, IBM grants an immunity from suit
 * under its patents, if any, for the use, sale or manufacture of products to
 * the extent that such products are used for performing Domain Name System
 * dynamic updates in TCP/IP networks by means of the Software.  No immunity is
 * granted for any product per se or for any other function of any product.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES,
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE.  IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL,
 * DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING
 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN
 * IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
 */

#include <string.h>
#include <ctype.h>

/*
 * Encoding (Base256->Base64):
 * Base256: 3 bytes, |11111111| |22222222| |33333333|
 * Base64:  4 bytes, |XX111111| |XX112222| |XX222233| |XX333333|
 *
 * Decoding (Base64->Base256):
 * Base64:  4 bytes, |XX111111| |XX222222| |XX333333| |XX444444|
 * Base256: 3 bytes, |11111122| |22223333| |33444444|
 *
 *                               -
 *
 * The following encoding technique is taken from RFC 1521 by Borenstein
 * and Freed. It is reproduced here in a slightly edited form for
 * convenience.
 *
 * A 65-character subset of US-ASCII is used, enabling 6 bits to be
 * represented per printable character. (The extra 65th character, "=",
 * is used to signify a special processing function.)
 *
 * The encoding process represents 24-bit groups of input bits as output
 * strings of 4 encoded characters. Proceeding from left to right, a
 * 24-bit input group is formed by concatenating 3 8-bit input groups.
 * These 24 bits are then treated as 4 concatenated 6-bit groups, each
 * of which is translated into a single digit in the base64 alphabet.
 *
 * Each 6-bit group is used as an index into an array of 64 printable
 * characters. The character referenced by the index is placed in the
 * output string.
 *
 *                       Table 1: The Base64 Alphabet
 *
 *    Value Encoding  Value Encoding  Value Encoding  Value Encoding
 *        0 A            17 R            34 i            51 z
 *        1 B            18 S            35 j            52 0
 *        2 C            19 T            36 k            53 1
 *        3 D            20 U            37 l            54 2
 *        4 E            21 V            38 m            55 3
 *        5 F            22 W            39 n            56 4
 *        6 G            23 X            40 o            57 5
 *        7 H            24 Y            41 p            58 6
 *        8 I            25 Z            42 q            59 7
 *        9 J            26 a            43 r            60 8
 *       10 K            27 b            44 s            61 9
 *       11 L            28 c            45 t            62 +
 *       12 M            29 d            46 u            63 /
 *       13 N            30 e            47 v
 *       14 O            31 f            48 w         (pad) =
 *       15 P            32 g            49 x
 *       16 Q            33 h            50 y
 *
 * Special processing is performed if fewer than 24 bits are available
 * at the end of the data being encoded.  A full encoding quantum is
 * always completed at the end of a quantity.  When fewer than 24 input
 * bits are available in an input group, zero bits are added (on the
 * right) to form an integral number of 6-bit groups.  Padding at the
 * end of the data is performed using the '=' character.
 *
 * Since all base64 input is an integral number of octets, only the
 * following cases can arise:
 *
 * (1) the final quantum of encoding input is an integral
 *     multiple of 24 bits; here, the final unit of encoded
 *     output will be an integral multiple of 4 characters
 *     with no "=" padding,
 * (2) the final quantum of encoding input is exactly 16 bits;
 *     here, the final unit of encoded output will be three
 *     characters followed by one "=" padding character, or
 * (3) the final quantum of encoding input is exactly 8 bits;
 *     here, the final unit of encoded output will be two
 *     characters followed by two "=" padding characters.
 */

static const char Base64[] =
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static const char Pad64 = '=';

/* encoding: binary -> base64 */
static int
base64_encode(
    char *dst, str_size_t dstlen,
    unsigned char const *src, str_size_t srclen,
    int strict)
{
    str_size_t dstpos;
    unsigned char input[3];
    unsigned char output[4];
    int ocnt;
    str_size_t i;

    if (srclen == 0)
        return -1;
    if (dst == NULL) {
        /* just calculate required length of dst */
        dstlen = (((srclen + 2) / 3) * 4);
        if (strict)
            dstlen += (dstlen / 72);
        return dstlen;
    }

    /* bulk encoding */
    dstpos = 0;
    ocnt = 0;
    while (srclen >= 3) {
        input[0] = *src++;
        input[1] = *src++;
        input[2] = *src++;
        srclen -= 3;

        output[0] = (input[0] >> 2);
        output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
        output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
        output[3] = (input[2] & 0x3f);

        if (dstpos + 4 > dstlen)
            return -1;
        dst[dstpos++] = Base64[output[0]];
        dst[dstpos++] = Base64[output[1]];
        dst[dstpos++] = Base64[output[2]];
        dst[dstpos++] = Base64[output[3]];
        if (strict)
            if (++ocnt % (72/4) == 0)
                dst[dstpos++] = '\n';
    }

    /* now worry about padding with remaining 1 or 2 bytes */
    if (srclen != 0) {
        input[0] = input[1] = input[2] = NUL;
        for (i = 0; i < srclen; i++)
            input[i] = *src++;

        output[0] = (input[0] >> 2);
        output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
        output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);

        if (dstpos + 4 > dstlen)
            return -1;
        dst[dstpos++] = Base64[output[0]];
        dst[dstpos++] = Base64[output[1]];
        if (srclen == 1)
            dst[dstpos++] = Pad64;
        else
            dst[dstpos++] = Base64[output[2]];
        dst[dstpos++] = Pad64;
    }

    if (dstpos >= dstlen)
        return -1;
    dst[dstpos] = NUL;

    return dstpos;
}

/* decoding: base64 -> binary */
static int
base64_decode(
    unsigned char *dst, str_size_t dstlen,
    char const *src, str_size_t srclen)
{
    int dstidx, state, ch = 0;
    unsigned char res;
    char *pos;

    if (srclen == 0)
        srclen = strlen(src);
    state = 0;
    dstidx = 0;
    res = 0;
    while (srclen-- > 0) {
        ch = *src++;
        if (isascii(ch) && isspace(ch)) /* Skip whitespace anywhere */
            continue;
        if (ch == Pad64)
            break;
        pos = strchr(Base64, ch);
        if (pos == 0)           /* A non-base64 character */
            return -1;
        switch (state) {
            case 0:
                if (dst != NULL) {
                    if ((str_size_t)dstidx >= dstlen)
                        return -1;
                    dst[dstidx] = ((pos - Base64) << 2);
                }
                state = 1;
                break;
            case 1:
                if (dst != NULL) {
                    if ((str_size_t)dstidx >= dstlen)
                        return -1;
                    dst[dstidx] |= ((pos - Base64) >> 4);
                    res = (((pos - Base64) & 0x0f) << 4);
                }
                dstidx++;
                state = 2;
                break;
            case 2:
                if (dst != NULL) {
                    if ((str_size_t)dstidx >= dstlen)
                        return -1;
                    dst[dstidx] = res | ((pos - Base64) >> 2);
                    res = ((pos - Base64) & 0x03) << 6;
                }
                dstidx++;
                state = 3;
                break;
            case 3:
                if (dst != NULL) {
                    if ((str_size_t)dstidx >= dstlen)
                        return -1;
                    dst[dstidx] = res | (pos - Base64);
                }
                dstidx++;
                state = 0;
                break;
            default:
                break;
        }
    }

    /*
     * We are done decoding Base-64 chars.  Let's see if we ended
     * on a byte boundary, and/or with erroneous trailing characters.
     */

    if (ch == Pad64) {          /* We got a pad char. */
        switch (state) {
            case 0:             /* Invalid = in first position */
            case 1:             /* Invalid = in second position */
                return -1;
            case 2:             /* Valid, means one byte of info */
                /* Skip any number of spaces. */
                while (srclen-- > 0) {
                    ch = *src++;
                    if (!(isascii(ch) && isspace(ch)))
                        break;
                }
                /* Make sure there is another trailing = sign. */
                if (ch != Pad64)
                    return -1;
                /* FALLTHROUGH */
            case 3:             /* Valid, means two bytes of info */
                /*
                 * We know this char is an =.  Is there anything but
                 * whitespace after it?
                 */
                while (srclen-- > 0) {
                    ch = *src++;
                    if (!(isascii(ch) && isspace(ch)))
                        return -1;
                }
                /*
                 * Now make sure for cases 2 and 3 that the "extra"
                 * bits that slopped past the last full byte were
                 * zeros.  If we don't check them, they become a
                 * subliminal channel.
                 */
                if (dst != NULL && res != 0)
                    return -1;
            default:
                break;
        }
    }
    else {
        /*
         * We ended by seeing the end of the string.  Make sure we
         * have no partial bytes lying around.
         */
        if (state != 0)
            return -1;
    }

    return dstidx;
}

/*
 * The API Function
 */
int str_base64(char *s, str_size_t n, unsigned char *ucp, str_size_t ulen, int mode)
{
    int rv;

    if (mode & STR_BASE64_ENCODE)
        rv = base64_encode(s, n, ucp, ulen, mode & STR_BASE64_STRICT ? TRUE : FALSE);
    else if (mode & STR_BASE64_DECODE)
        rv = base64_decode(ucp, ulen, s, n);
    else
        return -1;
    return rv;
}
OSSP CVS Repository