Re: [PATCH 03/11] vt: properly support zero-width Unicode code points

From: Jiri Slaby
Date: Mon Apr 14 2025 - 02:51:36 EST


On 10. 04. 25, 3:13, Nicolas Pitre wrote:
From: Nicolas Pitre <npitre@xxxxxxxxxxxx>

Zero-width Unicode code points are causing misalignment in vertically
aligned content, disrupting the visual layout. Let's handle zero-width
code points more intelligently.
...
--- a/drivers/tty/vt/vt.c
+++ b/drivers/tty/vt/vt.c
@@ -443,6 +443,15 @@ static void vc_uniscr_scroll(struct vc_data *vc, unsigned int top,
}
}
+static u32 vc_uniscr_getc(struct vc_data *vc, int relative_pos)
+{
+ int pos = vc->state.x + vc->vc_need_wrap + relative_pos;
+
+ if (vc->vc_uni_lines && pos >= 0 && pos < vc->vc_cols)

So that is:
in_range(pos, 0, vc->vc_cols)
right?

+ return vc->vc_uni_lines[vc->state.y][pos];
+ return 0;
+}
+
static void vc_uniscr_copy_area(u32 **dst_lines,
unsigned int dst_cols,
unsigned int dst_rows,
@@ -2905,18 +2914,49 @@ static bool vc_is_control(struct vc_data *vc, int tc, int c)
return false;
}
+static void vc_con_rewind(struct vc_data *vc)
+{
+ if (vc->state.x && !vc->vc_need_wrap) {
+ vc->vc_pos -= 2;
+ vc->state.x--;
+ }
+ vc->vc_need_wrap = 0;
+}
+
static int vc_con_write_normal(struct vc_data *vc, int tc, int c,
struct vc_draw_region *draw)
{
- int next_c;
+ int next_c, prev_c;
unsigned char vc_attr = vc->vc_attr;
u16 himask = vc->vc_hi_font_mask, charmask = himask ? 0x1ff : 0xff;
u8 width = 1;
bool inverse = false;
if (vc->vc_utf && !vc->vc_disp_ctrl) {
- if (ucs_is_double_width(c))
+ if (ucs_is_double_width(c)) {
width = 2;
+ } else if (ucs_is_zero_width(c)) {
+ prev_c = vc_uniscr_getc(vc, -1);
+ if (prev_c == ' ' &&
+ ucs_is_double_width(vc_uniscr_getc(vc, -2))) {
+ /*
+ * Let's merge this zero-width code point with
+ * the preceding double-width code point by
+ * replacing the existing whitespace padding.
+ */
+ vc_con_rewind(vc);
+ } else if (c == 0xfe0f && prev_c != 0) {
+ /*
+ * VS16 (U+FE0F) is special. Let it have a
+ * width of 1 when preceded by a single-width
+ * code point effectively making the later
+ * double-width.
+ */
+ } else {
+ /* Otherwise zero-width code points are ignored */
+ goto out;
+ }
+ }

Please, extract this width evaluation to a separate function.

...
--- a/include/linux/consolemap.h
+++ b/include/linux/consolemap.h
...
@@ -63,6 +68,11 @@ static inline bool ucs_is_double_width(uint32_t cp)
{
return false;
}
+
+static inline bool ucs_is_zero_width(uint32_t cp)
+{
+ return false;
+}

Again, is this necessary?

thanks,
--
js
suse labs