Check-in Number:
|
4297 | |
Date: |
2001-Aug-08 17:10:02 (local)
2001-Aug-08 15:10:02 (UTC) |
User: | simons |
Branch: | |
Comment: |
- Implemented UTF-8 encoding of strings.
- Fixed a buffer-overrun problem in xml_encode_string(). |
Tickets: |
|
Inspections: |
|
Files: |
|
ossp-pkg/xds/xml-decode-string.c 1.1 -> 1.2
--- xml-decode-string.c 2001/08/08 11:21:27 1.1
+++ xml-decode-string.c 2001/08/08 15:10:02 1.2
@@ -25,9 +25,71 @@
SUCH DAMAGE.
*/
+#include <stdio.h>
#include <string.h>
#include "xds.h"
+#define INVALID 0x80000000
+
+#define get(c) c = *strptr++; \
+ if (chars) (*chars)++; \
+ if ((c) == 0) return (unsigned int)EOF
+
+static unsigned int sgetu8(unsigned char *strptr, int *chars)
+ {
+ unsigned int c;
+ int i, iterations;
+ unsigned char ch;
+
+ if (chars)
+ *chars = 0;
+
+ if (strptr == NULL)
+ return (unsigned int)EOF;
+
+ get(c);
+
+ if ((c & 0xFE) == 0xFC)
+ {
+ c &= 0x01;
+ iterations = 5;
+ }
+ else if ((c & 0xFC) == 0xF8)
+ {
+ c &= 0x03;
+ iterations = 4;
+ }
+ else if ((c & 0xF8) == 0xF0)
+ {
+ c &= 0x07;
+ iterations = 3;
+ }
+ else if ((c & 0xF0) == 0xE0)
+ {
+ c &= 0x0F;
+ iterations = 2;
+ }
+ else if ((c & 0xE0) == 0xC0)
+ {
+ c &= 0x1F;
+ iterations = 1;
+ }
+ else if ((c & 0x80) == 0x80)
+ return INVALID;
+ else return c;
+
+ for (i = 0; i < iterations; i++)
+ {
+ get(ch);
+ if ((ch & 0xC0) != 0x80)
+ return INVALID;
+ c <<= 6;
+ c |= ch & 0x3F;
+ }
+
+ return c;
+ }
+
static const char TAG_OPEN[] = "<string>";
static const char TAG_CLOSE[] = "</string>";
static const size_t TAG_OPEN_LEN = sizeof(TAG_OPEN)-1;
@@ -41,6 +103,8 @@
char* src;
size_t src_len;
char* dst;
+ int utf8_len;
+ unsigned int rc;
/* Setup the engine. We need at least space for our tags; how long
the actual content is going to be will be seen soon. */
@@ -100,6 +164,16 @@
return XDS_ERR_TYPE_MISMATCH;
}
}
+ else if (*((xds_uint8_t*)src) >= 0x80)
+ {
+ rc = sgetu8((xds_uint8_t*)src, &utf8_len);
+ if (rc == (unsigned int)EOF)
+ return XDS_ERR_UNDERFLOW;
+ else if (rc == INVALID || rc > 255)
+ return XDS_ERR_TYPE_MISMATCH;
+ *dst++ = (xds_uint8_t)rc;
+ src += utf8_len; src_len -= utf8_len;
+ }
else
{
*dst++ = *src++;
|
|
ossp-pkg/xds/xml-encode-string.c 1.1 -> 1.2
--- xml-encode-string.c 2001/08/08 11:21:27 1.1
+++ xml-encode-string.c 2001/08/08 15:10:02 1.2
@@ -25,9 +25,71 @@
SUCH DAMAGE.
*/
+#include <stdio.h>
#include <string.h>
#include "xds.h"
+#define bits(c) (0x80 | ((c) & 0x3F))
+#define put(c) *strptr++ = (c);
+#define putbits(c) put(bits(c))
+#define finish() *strptr = '\0'
+
+static char* sputu8(xds_uint32_t c, char* strptr)
+ {
+ if (strptr == NULL)
+ return NULL;
+
+ if (c < 0x80)
+ {
+ put(c);
+ finish();
+ }
+ else if (c < 0x800)
+ {
+ put(0xC0 | (c >> 6));
+ putbits(c);
+ finish();
+ }
+ else if (c < 0x10000)
+ {
+ put(0xE0 | (c >> 12));
+ putbits(c >> 6);
+ putbits(c);
+ finish();
+ }
+ else if (c < 0x200000)
+ {
+ put(0xF0 | (c >> 18));
+ putbits(c >> 12);
+ putbits(c >> 6);
+ putbits(c);
+ finish();
+ }
+ else if (c < 0x400000)
+ {
+ put(0xF8 | (c >> 24));
+ putbits(c >> 18);
+ putbits(c >> 12);
+ putbits(c >> 6);
+ putbits(c);
+ finish();
+ }
+ else if (c < 0x80000000)
+ {
+ put(0xFC | (c >> 30));
+ putbits(c >> 24);
+ putbits(c >> 18);
+ putbits(c >> 12);
+ putbits(c >> 6);
+ putbits(c);
+ finish();
+ }
+ else
+ finish(); /* Not a valid Unicode "character" */
+
+ return strptr;
+ }
+
static const char TAG_OPEN[] = "<string>";
static const char TAG_CLOSE[] = "</string>";
static const size_t TAG_OPEN_LEN = sizeof(TAG_OPEN)-1;
@@ -41,6 +103,7 @@
size_t src_len;
char* dst;
size_t dst_size;
+ char* tmp;
/* Setup the engine. We need at least space for our tags; how long
the actual content is going to be will be seen soon. */
@@ -68,48 +131,67 @@
while(src_len > 0 && dst_size > 0)
{
- switch(*src)
- {
- case '<': /* Turn into "<". */
- if (dst_size >= 4)
- {
- *dst++ = '&'; --dst_size;
- *dst++ = 'l'; --dst_size;
- *dst++ = 't'; --dst_size;
- *dst++ = ';'; --dst_size;
- ++src; --src_len;
- }
- break;
- case '&': /* Turn into "&". */
- if (dst_size >= 5)
- {
- *dst++ = '&'; --dst_size;
- *dst++ = 'a'; --dst_size;
- *dst++ = 'm'; --dst_size;
- *dst++ = 'p'; --dst_size;
- *dst++ = ';'; --dst_size;
- ++src; --src_len;
- }
- break;
- case '>': /* Turn into ">". */
- if (dst_size >= 4)
- {
- *dst++ = '&'; --dst_size;
- *dst++ = 'g'; --dst_size;
- *dst++ = 't'; --dst_size;
- *dst++ = ';'; --dst_size;
- ++src; --src_len;
- }
- break;
- default: /* Just copy it. */
- *dst++ = *src++;
- --src_len;
- --dst_size;
+ if (*((xds_uint8_t*)src) >= 0x80)
+ { /* UTF-8ify it. */
+ if (dst_size >= 7)
+ {
+ tmp = sputu8((xds_uint32_t)*((xds_uint8_t*)src), dst);
+ ++src; --src_len;
+ dst_size -= tmp - dst;
+ dst = tmp;
+ }
+ else
+ dst_size = 0;
+ }
+ else if (*src == '<')
+ { /* Turn into "<". */
+ if (dst_size >= 4)
+ {
+ *dst++ = '&'; --dst_size;
+ *dst++ = 'l'; --dst_size;
+ *dst++ = 't'; --dst_size;
+ *dst++ = ';'; --dst_size;
+ ++src; --src_len;
+ }
+ else
+ dst_size = 0;
+ }
+ else if (*src == '&')
+ { /* Turn into "&". */
+ if (dst_size >= 5)
+ {
+ *dst++ = '&'; --dst_size;
+ *dst++ = 'a'; --dst_size;
+ *dst++ = 'm'; --dst_size;
+ *dst++ = 'p'; --dst_size;
+ *dst++ = ';'; --dst_size;
+ ++src; --src_len;
+ }
+ else
+ dst_size = 0;
+ }
+ else if (*src == '>')
+ { /* Turn into ">". */
+ if (dst_size >= 4)
+ {
+ *dst++ = '&'; --dst_size;
+ *dst++ = 'g'; --dst_size;
+ *dst++ = 't'; --dst_size;
+ *dst++ = ';'; --dst_size;
+ ++src; --src_len;
+ }
+ else
+ dst_size = 0;
+ }
+ else
+ { /* No special character; just copy it. */
+ *dst++ = *src++;
+ --src_len; --dst_size;
}
}
if (src_len > 0)
{ /* Target buffer was too small. */
- *used_buffer_size = dst - (char*)buffer + 1;
+ *used_buffer_size = buffer_size + 1;
return XDS_ERR_OVERFLOW;
}
|
|