[PATCH 4/4] Support non-BMP characters on UDF

From: Vladimir 'Ï-coder/phcoder' Serbinenko
Date: Thu May 31 2012 - 21:10:15 EST


Replace UCS-2 with proper UTF-16.
Signed-off-by: Vladimir Serbinenko <phcoder@xxxxxxxxx>
---
fs/udf/unicode.c | 35 +++++++++++++++++++++++++++--------
1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 7df644d..0d1c93c 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -106,9 +106,20 @@ int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
utf_o->u_len = 0;
for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
/* Expand OSTA compressed Unicode to Unicode */
- uint32_t c = ocu[i++];
- if (cmp_id == 16)
- c = (c << 8) | ocu[i++];
+ unicode_t c;
+ if (cmp_id == 8)
+ c = ocu[i++];
+ else {
+ int s = utf16s_to_unicode((u16 *) (ocu + i),
+ (ocu_len - i) / 2,
+ UTF16_BIG_ENDIAN_UNALIGNED,
+ &c);
+ if (s <= 0) {
+ c = (ocu[i] << 8) | ocu[i+1];
+ s = 1;
+ }
+ i += 2 * s;
+ }

len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
UDF_NAME_LEN - utf_o->u_len);
@@ -142,20 +153,28 @@ try_again:
if (!len)
continue;
/* Invalid character, deal with it */
- if (len < 0 || uni_char > 0xffff) {
+ if (len < 0 || uni_char > 0x10ffff) {
len = 1;
uni_char = '?';
}

if (uni_char > max_val) {
- max_val = 0xffffU;
+ max_val = 0x10ffffU;
ocu[0] = (uint8_t)0x10U;
goto try_again;
}

- if (max_val == 0xffffU)
- ocu[++u_len] = (uint8_t)(uni_char >> 8);
- ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
+ if (max_val == 0x10ffffU) {
+ int s;
+ s = unicode_to_utf16s(uni_char,
+ UTF16_BIG_ENDIAN_UNALIGNED,
+ (u16 *) (ocu + u_len + 1),
+ (length - (u_len + 1)) / 2);
+ if (s <= 0)
+ break;
+ u_len += 2 * s;
+ } else
+ ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
i += len - 1;
}

--
1.7.10

--
Regards
Vladimir 'Ï-coder/phcoder' Serbinenko

Attachment: signature.asc
Description: OpenPGP digital signature