[PATCH v2 12/13] vt: update gen_ucs_width_table.py to make tables more space efficient
From: Nicolas Pitre
Date: Tue Apr 15 2025 - 15:24:03 EST
From: Nicolas Pitre <npitre@xxxxxxxxxxxx>
Split table ranges into BMP (16-bit) and non-BMP (above 16-bit).
This reduces the corresponding text size by 20-25%.
Signed-off-by: Nicolas Pitre <npitre@xxxxxxxxxxxx>
---
drivers/tty/vt/gen_ucs_width_table.py | 55 ++++++++++++++++++++++++---
1 file changed, 49 insertions(+), 6 deletions(-)
diff --git a/drivers/tty/vt/gen_ucs_width_table.py b/drivers/tty/vt/gen_ucs_width_table.py
index 00510444a7..059ed9a8ba 100755
--- a/drivers/tty/vt/gen_ucs_width_table.py
+++ b/drivers/tty/vt/gen_ucs_width_table.py
@@ -194,6 +194,27 @@ def write_tables(zero_width_ranges, double_width_ranges):
double_width_ranges: List of (start, end) ranges for double-width characters
"""
+ # Function to split ranges into BMP (16-bit) and non-BMP (above 16-bit)
+ def split_ranges_by_size(ranges):
+ bmp_ranges = []
+ non_bmp_ranges = []
+
+ for start, end in ranges:
+ if end <= 0xFFFF:
+ bmp_ranges.append((start, end))
+ elif start > 0xFFFF:
+ non_bmp_ranges.append((start, end))
+ else:
+ # Split the range at 0xFFFF
+ bmp_ranges.append((start, 0xFFFF))
+ non_bmp_ranges.append((0x10000, end))
+
+ return bmp_ranges, non_bmp_ranges
+
+ # Split ranges into BMP and non-BMP
+ zero_width_bmp, zero_width_non_bmp = split_ranges_by_size(zero_width_ranges)
+ double_width_bmp, double_width_non_bmp = split_ranges_by_size(double_width_ranges)
+
# Function to generate code point description comments
def get_code_point_comment(start, end):
try:
@@ -221,22 +242,44 @@ def write_tables(zero_width_ranges, double_width_ranges):
* Unicode Version: {unicodedata.unidata_version}
*/
-/* Zero-width character ranges */
-static const struct ucs_interval ucs_zero_width_ranges[] = {{
+/* Zero-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */
+static const struct ucs_interval16 ucs_zero_width_bmp_ranges[] = {{
+""")
+
+ for start, end in zero_width_bmp:
+ comment = get_code_point_comment(start, end)
+ f.write(f"\t{{ 0x{start:04X}, 0x{end:04X} }}, {comment}\n")
+
+ f.write("""\
+};
+
+/* Zero-width character ranges (non-BMP, U+10000 and above) */
+static const struct ucs_interval32 ucs_zero_width_non_bmp_ranges[] = {
""")
- for start, end in zero_width_ranges:
+ for start, end in zero_width_non_bmp:
comment = get_code_point_comment(start, end)
f.write(f"\t{{ 0x{start:05X}, 0x{end:05X} }}, {comment}\n")
f.write("""\
};
-/* Double-width character ranges */
-static const struct ucs_interval ucs_double_width_ranges[] = {
+/* Double-width character ranges (BMP - Basic Multilingual Plane, U+0000 to U+FFFF) */
+static const struct ucs_interval16 ucs_double_width_bmp_ranges[] = {
+""")
+
+ for start, end in double_width_bmp:
+ comment = get_code_point_comment(start, end)
+ f.write(f"\t{{ 0x{start:04X}, 0x{end:04X} }}, {comment}\n")
+
+ f.write("""\
+};
+
+/* Double-width character ranges (non-BMP, U+10000 and above) */
+static const struct ucs_interval32 ucs_double_width_non_bmp_ranges[] = {
""")
- for start, end in double_width_ranges:
+ for start, end in double_width_non_bmp:
comment = get_code_point_comment(start, end)
f.write(f"\t{{ 0x{start:05X}, 0x{end:05X} }}, {comment}\n")
--
2.49.0