OSSP CVS Repository

ossp - Check-in [4297]
Not logged in
[Honeypot]  [Browse]  [Home]  [Login]  [Reports
[Search]  [Ticket]  [Timeline
  [Patchset]  [Tagging/Branching

Check-in Number: 4297
Date: 2001-Aug-08 17:10:02 (local)
2001-Aug-08 15:10:02 (UTC)
User:simons
Branch:
Comment: - Implemented UTF-8 encoding of strings. - Fixed a buffer-overrun problem in xml_encode_string().
Tickets:
Inspections:
Files:
ossp-pkg/xds/xml-decode-string.c      1.1->removed
ossp-pkg/xds/xml-encode-string.c      1.1->removed

ossp-pkg/xds/xml-decode-string.c 1.1 -> 1.2

--- xml-decode-string.c  2001/08/08 11:21:27     1.1
+++ xml-decode-string.c  2001/08/08 15:10:02     1.2
@@ -25,9 +25,71 @@
    SUCH DAMAGE.
 */
 
+#include <stdio.h>
 #include <string.h>
 #include "xds.h"
 
+#define INVALID 0x80000000
+
+#define get(c)  c = *strptr++; \
+        if (chars) (*chars)++; \
+        if ((c) == 0) return (unsigned int)EOF
+
+static unsigned int sgetu8(unsigned char *strptr, int *chars)
+    {
+    unsigned int c;
+    int i, iterations;
+    unsigned char ch;
+
+    if (chars)
+        *chars = 0;
+
+    if (strptr == NULL)
+        return (unsigned int)EOF;
+
+    get(c);
+
+    if ((c & 0xFE) == 0xFC)
+        {
+        c &= 0x01;
+        iterations = 5;
+        }
+    else if ((c & 0xFC) == 0xF8)
+        {
+        c &= 0x03;
+        iterations = 4;
+        }
+    else if ((c & 0xF8) == 0xF0)
+        {
+        c &= 0x07;
+        iterations = 3;
+        }
+    else if ((c & 0xF0) == 0xE0)
+        {
+        c &= 0x0F;
+        iterations = 2;
+        }
+    else if ((c & 0xE0) == 0xC0)
+        {
+        c &= 0x1F;
+        iterations = 1;
+        }
+    else if ((c & 0x80) == 0x80)
+        return INVALID;
+    else return c;
+
+    for (i = 0; i < iterations; i++)
+        {
+        get(ch);
+        if ((ch & 0xC0) != 0x80)
+            return INVALID;
+        c <<= 6;
+        c |= ch & 0x3F;
+        }
+
+    return c;
+    }
+
 static const char TAG_OPEN[] = "<string>";
 static const char TAG_CLOSE[] = "</string>";
 static const size_t TAG_OPEN_LEN = sizeof(TAG_OPEN)-1;
@@ -41,6 +103,8 @@
     char*   src;
     size_t  src_len;
     char*   dst;
+    int     utf8_len;
+    unsigned int rc;
 
     /* Setup the engine. We need at least space for our tags; how long
        the actual content is going to be will be seen soon. */
@@ -100,6 +164,16 @@
                 return XDS_ERR_TYPE_MISMATCH;
                 }
             }
+        else if (*((xds_uint8_t*)src) >= 0x80)
+            {
+            rc = sgetu8((xds_uint8_t*)src, &utf8_len);
+            if (rc == (unsigned int)EOF)
+                return XDS_ERR_UNDERFLOW;
+            else if (rc == INVALID || rc > 255)
+                return XDS_ERR_TYPE_MISMATCH;
+            *dst++ = (xds_uint8_t)rc;
+            src += utf8_len; src_len -= utf8_len;
+            }
         else
             {
             *dst++ = *src++;


ossp-pkg/xds/xml-encode-string.c 1.1 -> 1.2

--- xml-encode-string.c  2001/08/08 11:21:27     1.1
+++ xml-encode-string.c  2001/08/08 15:10:02     1.2
@@ -25,9 +25,71 @@
    SUCH DAMAGE.
 */
 
+#include <stdio.h>
 #include <string.h>
 #include "xds.h"
 
+#define bits(c) (0x80 | ((c) & 0x3F))
+#define put(c)  *strptr++ = (c);
+#define putbits(c)      put(bits(c))
+#define finish()        *strptr = '\0'
+
+static char* sputu8(xds_uint32_t c, char* strptr)
+    {
+    if (strptr == NULL)
+        return NULL;
+
+    if (c < 0x80)
+        {
+        put(c);
+        finish();
+        }
+    else if (c < 0x800)
+        {
+        put(0xC0 | (c >>  6));
+        putbits(c);
+        finish();
+        }
+    else if (c < 0x10000)
+        {
+        put(0xE0 | (c >> 12));
+        putbits(c >>  6);
+        putbits(c);
+        finish();
+        }
+    else if (c < 0x200000)
+        {
+        put(0xF0 | (c >> 18));
+        putbits(c >> 12);
+        putbits(c >>  6);
+        putbits(c);
+        finish();
+        }
+    else if (c < 0x400000)
+        {
+        put(0xF8 | (c >> 24));
+        putbits(c >> 18);
+        putbits(c >> 12);
+        putbits(c >>  6);
+        putbits(c);
+        finish();
+        }
+    else if (c < 0x80000000)
+        {
+        put(0xFC | (c >> 30));
+        putbits(c >> 24);
+        putbits(c >> 18);
+        putbits(c >> 12);
+        putbits(c >>  6);
+        putbits(c);
+        finish();
+        }
+    else
+        finish();               /* Not a valid Unicode "character" */
+
+    return strptr;
+    }
+
 static const char TAG_OPEN[] = "<string>";
 static const char TAG_CLOSE[] = "</string>";
 static const size_t TAG_OPEN_LEN = sizeof(TAG_OPEN)-1;
@@ -41,6 +103,7 @@
     size_t src_len;
     char*  dst;
     size_t dst_size;
+    char*  tmp;
 
     /* Setup the engine. We need at least space for our tags; how long
        the actual content is going to be will be seen soon. */
@@ -68,48 +131,67 @@
 
     while(src_len > 0 && dst_size > 0)
         {
-        switch(*src)
-            {
-            case '<':           /* Turn into "&lt;". */
-                if (dst_size >= 4)
-                    {
-                    *dst++ = '&'; --dst_size;
-                    *dst++ = 'l'; --dst_size;
-                    *dst++ = 't'; --dst_size;
-                    *dst++ = ';'; --dst_size;
-                    ++src; --src_len;
-                    }
-                break;
-            case '&':           /* Turn into "&amp;". */
-                if (dst_size >= 5)
-                    {
-                    *dst++ = '&'; --dst_size;
-                    *dst++ = 'a'; --dst_size;
-                    *dst++ = 'm'; --dst_size;
-                    *dst++ = 'p'; --dst_size;
-                    *dst++ = ';'; --dst_size;
-                    ++src; --src_len;
-                    }
-                break;
-            case '>':           /* Turn into "&gt;". */
-                if (dst_size >= 4)
-                    {
-                    *dst++ = '&'; --dst_size;
-                    *dst++ = 'g'; --dst_size;
-                    *dst++ = 't'; --dst_size;
-                    *dst++ = ';'; --dst_size;
-                    ++src; --src_len;
-                    }
-                break;
-            default:            /* Just copy it. */
-                *dst++ = *src++;
-                --src_len;
-                --dst_size;
+        if (*((xds_uint8_t*)src) >= 0x80)
+            {                   /* UTF-8ify it. */
+            if (dst_size >= 7)
+                {
+                tmp = sputu8((xds_uint32_t)*((xds_uint8_t*)src), dst);
+                ++src; --src_len;
+                dst_size -= tmp - dst;
+                dst = tmp;
+                }
+            else
+                dst_size = 0;
+            }
+        else if (*src == '<')
+            {                   /* Turn into "&lt;". */
+            if (dst_size >= 4)
+                {
+                *dst++ = '&'; --dst_size;
+                *dst++ = 'l'; --dst_size;
+                *dst++ = 't'; --dst_size;
+                *dst++ = ';'; --dst_size;
+                ++src; --src_len;
+                }
+            else
+                dst_size = 0;
+            }
+        else if (*src == '&')
+            {                   /* Turn into "&amp;". */
+            if (dst_size >= 5)
+                {
+                *dst++ = '&'; --dst_size;
+                *dst++ = 'a'; --dst_size;
+                *dst++ = 'm'; --dst_size;
+                *dst++ = 'p'; --dst_size;
+                *dst++ = ';'; --dst_size;
+                ++src; --src_len;
+                }
+            else
+                dst_size = 0;
+            }
+        else if (*src == '>')
+            {                   /* Turn into "&gt;". */
+            if (dst_size >= 4)
+                {
+                *dst++ = '&'; --dst_size;
+                *dst++ = 'g'; --dst_size;
+                *dst++ = 't'; --dst_size;
+                *dst++ = ';'; --dst_size;
+                ++src; --src_len;
+                }
+            else
+                dst_size = 0;
+            }
+        else
+            {                   /* No special character; just copy it. */
+            *dst++ = *src++;
+            --src_len; --dst_size;
             }
         }
     if (src_len > 0)
         {                       /* Target buffer was too small. */
-        *used_buffer_size = dst - (char*)buffer + 1;
+        *used_buffer_size = buffer_size + 1;
         return XDS_ERR_OVERFLOW;
         }
 

CVSTrac 2.0.1