NLS: improve UTF8 -> UTF16 string conversion routine
Alan Stern [Thu, 17 Nov 2011 21:42:19 +0000 (16:42 -0500)]
The utf8s_to_utf16s conversion routine needs to be improved.  Unlike
its utf16s_to_utf8s sibling, it doesn't accept arguments specifying
the maximum length of the output buffer or the endianness of its
16-bit output.

This patch (as1501) adds the two missing arguments, and adjusts the
only two places in the kernel where the function is called.  A
follow-on patch will add a third caller that does utilize the new
capabilities.

The two conversion routines are still annoyingly inconsistent in the
way they handle invalid byte combinations.  But that's a subject for a
different patch.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

drivers/hv/hv_kvp.c
fs/fat/namei_vfat.c
fs/nls/nls_base.c
include/linux/nls.h

index 89f5244..0e8343f 100644 (file)
@@ -212,11 +212,13 @@ kvp_respond_to_host(char *key, char *value, int error)
         * The windows host expects the key/value pair to be encoded
         * in utf16.
         */
-       keylen = utf8s_to_utf16s(key_name, strlen(key_name),
-                               (wchar_t *)kvp_data->data.key);
+       keylen = utf8s_to_utf16s(key_name, strlen(key_name), UTF16_HOST_ENDIAN,
+                               (wchar_t *) kvp_data->data.key,
+                               HV_KVP_EXCHANGE_MAX_KEY_SIZE / 2);
        kvp_data->data.key_size = 2*(keylen + 1); /* utf16 encoding */
-       valuelen = utf8s_to_utf16s(value, strlen(value),
-                               (wchar_t *)kvp_data->data.value);
+       valuelen = utf8s_to_utf16s(value, strlen(value), UTF16_HOST_ENDIAN,
+                               (wchar_t *) kvp_data->data.value,
+                               HV_KVP_EXCHANGE_MAX_VALUE_SIZE / 2);
        kvp_data->data.value_size = 2*(valuelen + 1); /* utf16 encoding */
 
        kvp_data->data.value_type = REG_SZ; /* all our values are strings */
index a87a656..c25cf15 100644 (file)
@@ -512,7 +512,8 @@ xlate_to_uni(const unsigned char *name, int len, unsigned char *outname,
        int charlen;
 
        if (utf8) {
-               *outlen = utf8s_to_utf16s(name, len, (wchar_t *)outname);
+               *outlen = utf8s_to_utf16s(name, len, UTF16_HOST_ENDIAN,
+                               (wchar_t *) outname, FAT_LFN_LEN + 2);
                if (*outlen < 0)
                        return *outlen;
                else if (*outlen > FAT_LFN_LEN)
index 44a88a9..0eb059e 100644 (file)
@@ -114,34 +114,57 @@ int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
 }
 EXPORT_SYMBOL(utf32_to_utf8);
 
-int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
+static inline void put_utf16(wchar_t *s, unsigned c, enum utf16_endian endian)
+{
+       switch (endian) {
+       default:
+               *s = (wchar_t) c;
+               break;
+       case UTF16_LITTLE_ENDIAN:
+               *s = __cpu_to_le16(c);
+               break;
+       case UTF16_BIG_ENDIAN:
+               *s = __cpu_to_be16(c);
+               break;
+       }
+}
+
+int utf8s_to_utf16s(const u8 *s, int len, enum utf16_endian endian,
+               wchar_t *pwcs, int maxlen)
 {
        u16 *op;
        int size;
        unicode_t u;
 
        op = pwcs;
-       while (*s && len > 0) {
+       while (len > 0 && maxlen > 0 && *s) {
                if (*s & 0x80) {
                        size = utf8_to_utf32(s, len, &u);
                        if (size < 0)
                                return -EINVAL;
+                       s += size;
+                       len -= size;
 
                        if (u >= PLANE_SIZE) {
+                               if (maxlen < 2)
+                                       break;
                                u -= PLANE_SIZE;
-                               *op++ = (wchar_t) (SURROGATE_PAIR |
-                                               ((u >> 10) & SURROGATE_BITS));
-                               *op++ = (wchar_t) (SURROGATE_PAIR |
+                               put_utf16(op++, SURROGATE_PAIR |
+                                               ((u >> 10) & SURROGATE_BITS),
+                                               endian);
+                               put_utf16(op++, SURROGATE_PAIR |
                                                SURROGATE_LOW |
-                                               (u & SURROGATE_BITS));
+                                               (u & SURROGATE_BITS),
+                                               endian);
+                               maxlen -= 2;
                        } else {
-                               *op++ = (wchar_t) u;
+                               put_utf16(op++, u, endian);
+                               maxlen--;
                        }
-                       s += size;
-                       len -= size;
                } else {
-                       *op++ = *s++;
+                       put_utf16(op++, *s++, endian);
                        len--;
+                       maxlen--;
                }
        }
        return op - pwcs;
index d47beef..5dc635f 100644 (file)
@@ -43,7 +43,7 @@ enum utf16_endian {
        UTF16_BIG_ENDIAN
 };
 
-/* nls.c */
+/* nls_base.c */
 extern int register_nls(struct nls_table *);
 extern int unregister_nls(struct nls_table *);
 extern struct nls_table *load_nls(char *);
@@ -52,7 +52,8 @@ extern struct nls_table *load_nls_default(void);
 
 extern int utf8_to_utf32(const u8 *s, int len, unicode_t *pu);
 extern int utf32_to_utf8(unicode_t u, u8 *s, int maxlen);
-extern int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs);
+extern int utf8s_to_utf16s(const u8 *s, int len,
+               enum utf16_endian endian, wchar_t *pwcs, int maxlen);
 extern int utf16s_to_utf8s(const wchar_t *pwcs, int len,
                enum utf16_endian endian, u8 *s, int maxlen);