ossp-pkg/str/str_base64.c
/*
** OSSP str - String Handling
** Copyright (c) 1999-2005 Ralf S. Engelschall <rse@engelschall.com>
** Copyright (c) 1999-2005 The OSSP Project <http://www.ossp.org/>
**
** This file is part of OSSP str, a string handling and manipulation
** library which can be found at http://www.ossp.org/pkg/lib/str/.
**
** Permission to use, copy, modify, and distribute this software for
** any purpose with or without fee is hereby granted, provided that
** the above copyright notice and this permission notice appear in all
** copies.
**
** THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
** WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
** MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
** IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
** CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
** SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
** LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
** USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
** ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
** OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
** OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
** SUCH DAMAGE.
**
** str_convert.c: string to byte conversions and vice versa
*/
#include "str_p.h"
/*
* Copyright (c) 1996, 1998 by Internet Software Consortium.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
* ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
* CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*/
/*
* Portions Copyright (c) 1995 by International Business Machines, Inc.
*
* International Business Machines, Inc. (hereinafter called IBM) grants
* permission under its copyrights to use, copy, modify, and distribute this
* Software with or without fee, provided that the above copyright notice and
* all paragraphs of this notice appear in all copies, and that the name of IBM
* not be used in connection with the marketing of any product incorporating
* the Software or modifications thereof, without specific, written prior
* permission.
*
* To the extent it has a right to do so, IBM grants an immunity from suit
* under its patents, if any, for the use, sale or manufacture of products to
* the extent that such products are used for performing Domain Name System
* dynamic updates in TCP/IP networks by means of the Software. No immunity is
* granted for any product per se or for any other function of any product.
*
* THE SOFTWARE IS PROVIDED "AS IS", AND IBM DISCLAIMS ALL WARRANTIES,
* INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
* PARTICULAR PURPOSE. IN NO EVENT SHALL IBM BE LIABLE FOR ANY SPECIAL,
* DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER ARISING
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE, EVEN
* IF IBM IS APPRISED OF THE POSSIBILITY OF SUCH DAMAGES.
*/
#include <string.h>
#include <ctype.h>
/*
* Encoding (Base256->Base64):
* Base256: 3 bytes, |11111111| |22222222| |33333333|
* Base64: 4 bytes, |XX111111| |XX112222| |XX222233| |XX333333|
*
* Decoding (Base64->Base256):
* Base64: 4 bytes, |XX111111| |XX222222| |XX333333| |XX444444|
* Base256: 3 bytes, |11111122| |22223333| |33444444|
*
* -
*
* The following encoding technique is taken from RFC 1521 by Borenstein
* and Freed. It is reproduced here in a slightly edited form for
* convenience.
*
* A 65-character subset of US-ASCII is used, enabling 6 bits to be
* represented per printable character. (The extra 65th character, "=",
* is used to signify a special processing function.)
*
* The encoding process represents 24-bit groups of input bits as output
* strings of 4 encoded characters. Proceeding from left to right, a
* 24-bit input group is formed by concatenating 3 8-bit input groups.
* These 24 bits are then treated as 4 concatenated 6-bit groups, each
* of which is translated into a single digit in the base64 alphabet.
*
* Each 6-bit group is used as an index into an array of 64 printable
* characters. The character referenced by the index is placed in the
* output string.
*
* Table 1: The Base64 Alphabet
*
* Value Encoding Value Encoding Value Encoding Value Encoding
* 0 A 17 R 34 i 51 z
* 1 B 18 S 35 j 52 0
* 2 C 19 T 36 k 53 1
* 3 D 20 U 37 l 54 2
* 4 E 21 V 38 m 55 3
* 5 F 22 W 39 n 56 4
* 6 G 23 X 40 o 57 5
* 7 H 24 Y 41 p 58 6
* 8 I 25 Z 42 q 59 7
* 9 J 26 a 43 r 60 8
* 10 K 27 b 44 s 61 9
* 11 L 28 c 45 t 62 +
* 12 M 29 d 46 u 63 /
* 13 N 30 e 47 v
* 14 O 31 f 48 w (pad) =
* 15 P 32 g 49 x
* 16 Q 33 h 50 y
*
* Special processing is performed if fewer than 24 bits are available
* at the end of the data being encoded. A full encoding quantum is
* always completed at the end of a quantity. When fewer than 24 input
* bits are available in an input group, zero bits are added (on the
* right) to form an integral number of 6-bit groups. Padding at the
* end of the data is performed using the '=' character.
*
* Since all base64 input is an integral number of octets, only the
* following cases can arise:
*
* (1) the final quantum of encoding input is an integral
* multiple of 24 bits; here, the final unit of encoded
* output will be an integral multiple of 4 characters
* with no "=" padding,
* (2) the final quantum of encoding input is exactly 16 bits;
* here, the final unit of encoded output will be three
* characters followed by one "=" padding character, or
* (3) the final quantum of encoding input is exactly 8 bits;
* here, the final unit of encoded output will be two
* characters followed by two "=" padding characters.
*/
static const char Base64[] =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static const char Pad64 = '=';
/* encoding: binary -> base64 */
static int
base64_encode(
char *dst, str_size_t dstlen,
unsigned char const *src, str_size_t srclen,
int strict)
{
str_size_t dstpos;
unsigned char input[3];
unsigned char output[4];
int ocnt;
str_size_t i;
if (srclen == 0)
return -1;
if (dst == NULL) {
/* just calculate required length of dst */
dstlen = (((srclen + 2) / 3) * 4);
if (strict)
dstlen += (dstlen / 72);
return dstlen;
}
/* bulk encoding */
dstpos = 0;
ocnt = 0;
while (srclen >= 3) {
input[0] = *src++;
input[1] = *src++;
input[2] = *src++;
srclen -= 3;
output[0] = (input[0] >> 2);
output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
output[3] = (input[2] & 0x3f);
if (dstpos + 4 > dstlen)
return -1;
dst[dstpos++] = Base64[output[0]];
dst[dstpos++] = Base64[output[1]];
dst[dstpos++] = Base64[output[2]];
dst[dstpos++] = Base64[output[3]];
if (strict)
if (++ocnt % (72/4) == 0)
dst[dstpos++] = '\n';
}
/* now worry about padding with remaining 1 or 2 bytes */
if (srclen != 0) {
input[0] = input[1] = input[2] = NUL;
for (i = 0; i < srclen; i++)
input[i] = *src++;
output[0] = (input[0] >> 2);
output[1] = ((input[0] & 0x03) << 4) + (input[1] >> 4);
output[2] = ((input[1] & 0x0f) << 2) + (input[2] >> 6);
if (dstpos + 4 > dstlen)
return -1;
dst[dstpos++] = Base64[output[0]];
dst[dstpos++] = Base64[output[1]];
if (srclen == 1)
dst[dstpos++] = Pad64;
else
dst[dstpos++] = Base64[output[2]];
dst[dstpos++] = Pad64;
}
if (dstpos >= dstlen)
return -1;
dst[dstpos] = NUL;
return dstpos;
}
/* decoding: base64 -> binary */
static int
base64_decode(
unsigned char *dst, str_size_t dstlen,
char const *src, str_size_t srclen)
{
int dstidx, state, ch = 0;
unsigned char res;
char *pos;
if (srclen == 0)
srclen = strlen(src);
state = 0;
dstidx = 0;
res = 0;
while (srclen-- > 0) {
ch = *src++;
if (isascii(ch) && isspace(ch)) /* Skip whitespace anywhere */
continue;
if (ch == Pad64)
break;
pos = strchr(Base64, ch);
if (pos == 0) /* A non-base64 character */
return -1;
switch (state) {
case 0:
if (dst != NULL) {
if ((str_size_t)dstidx >= dstlen)
return -1;
dst[dstidx] = ((pos - Base64) << 2);
}
state = 1;
break;
case 1:
if (dst != NULL) {
if ((str_size_t)dstidx >= dstlen)
return -1;
dst[dstidx] |= ((pos - Base64) >> 4);
res = (((pos - Base64) & 0x0f) << 4);
}
dstidx++;
state = 2;
break;
case 2:
if (dst != NULL) {
if ((str_size_t)dstidx >= dstlen)
return -1;
dst[dstidx] = res | ((pos - Base64) >> 2);
res = ((pos - Base64) & 0x03) << 6;
}
dstidx++;
state = 3;
break;
case 3:
if (dst != NULL) {
if ((str_size_t)dstidx >= dstlen)
return -1;
dst[dstidx] = res | (pos - Base64);
}
dstidx++;
state = 0;
break;
default:
break;
}
}
/*
* We are done decoding Base-64 chars. Let's see if we ended
* on a byte boundary, and/or with erroneous trailing characters.
*/
if (ch == Pad64) { /* We got a pad char. */
switch (state) {
case 0: /* Invalid = in first position */
case 1: /* Invalid = in second position */
return -1;
case 2: /* Valid, means one byte of info */
/* Skip any number of spaces. */
while (srclen-- > 0) {
ch = *src++;
if (!(isascii(ch) && isspace(ch)))
break;
}
/* Make sure there is another trailing = sign. */
if (ch != Pad64)
return -1;
/* FALLTHROUGH */
case 3: /* Valid, means two bytes of info */
/*
* We know this char is an =. Is there anything but
* whitespace after it?
*/
while (srclen-- > 0) {
ch = *src++;
if (!(isascii(ch) && isspace(ch)))
return -1;
}
/*
* Now make sure for cases 2 and 3 that the "extra"
* bits that slopped past the last full byte were
* zeros. If we don't check them, they become a
* subliminal channel.
*/
if (dst != NULL && res != 0)
return -1;
default:
break;
}
}
else {
/*
* We ended by seeing the end of the string. Make sure we
* have no partial bytes lying around.
*/
if (state != 0)
return -1;
}
return dstidx;
}
/*
* The API Function
*/
int str_base64(char *s, str_size_t n, unsigned char *ucp, str_size_t ulen, int mode)
{
int rv;
if (mode & STR_BASE64_ENCODE)
rv = base64_encode(s, n, ucp, ulen, mode & STR_BASE64_STRICT ? TRUE : FALSE);
else if (mode & STR_BASE64_DECODE)
rv = base64_decode(ucp, ulen, s, n);
else
return -1;
return rv;
}