[PATCH] x86/iopl: Factor out IO-bitmap related TSS fields into 'struct x86_io_bitmap'

From: Ingo Molnar
Date: Tue Nov 12 2019 - 02:41:09 EST



* Thomas Gleixner <tglx@xxxxxxxxxxxxx> wrote:

> This is the second version of the attempt to confine the unwanted side
> effects of iopl(). The first version of this series can be found here:
>
> https://lore.kernel.org/r/20191106193459.581614484@xxxxxxxxxxxxx
>
> The V1 cover letter also contains a longer variant of the
> background. Summary:
>
> iopl(level = 3) enables aside of access to all 65536 I/O ports also the
> usage of CLI/STI in user space.
>
> Disabling interrupts in user space can lead to system lockups and breaks
> assumptions in the kernel that userspace always runs with interrupts
> enabled.
>
> iopl() is often preferred over ioperm() as it avoids the overhead of
> copying the tasks I/O bitmap to the TSS bitmap on context switch. This
> overhead can be avoided by providing a all zeroes bitmap in the TSS and
> switching the TSS bitmap offset to this permit all IO bitmap. It's
> marginally slower than iopl() which is a one time setup, but prevents the
> usage of CLI/STI in user space.
>
> The changes vs. V1:
>
> - Fix the reported fallout on 32bit (0-day/Ingo)
>
> - Implement a sequence count based conditional update (Linus)
>
> - Drop the copy optimization
>
> - Move the bitmap copying out of the context switch into the exit to
> user mode machinery. The context switch merely invalidates the TSS
> bitmap offset when a task using an I/O bitmap gets scheduled out.
>
> - Move all bitmap information into a data structure to avoid adding
> more fields to thread_struct.
>
> - Add a refcount so the potentially pointless duplication of the bitmap
> at fork can be avoided.
>
> - Better sharing of update functions (Andy)
>
> - More updates to self tests to verify the share/unshare mechanism and
> the restore of an I/O bitmap when iopl() permissions are dropped.
>
> - Pick up a few acked/reviewed-by tags as applicable

> 23 files changed, 614 insertions(+), 459 deletions(-)

Ok, this new version is much easier on the eyes.

There's now a bigger collection of various x86_tss_struct fields related
to the IO-bitmap, and the organization of those fields is still a bit
idiosyncratic - for example tss->last_sequence doesn't tell us that it's
bitmap related.

How about the patch below?

It reorganizes all those fields into a new container structure, 'struct
x86_io_bitmap', adds it as tss.io_bitmap, and uses the prefix to shorten
and organize the names of the fields:

tss.last_bitmap => tss.io_bitmap.last_bitmap
tss.last_sequence => tss.io_bitmap.last_sequence
tss.io_bitmap_prev_max => tss.io_bitmap.prev_max
tss.io_bitmap_bytes => tss.io_bitmap.map_bytes
tss.io_bitmap_all => tss.io_bitmap.map_all

This makes it far more readable, and the local variable references as
short and tidy:

iobm->last_bitmap
iobm->last_sequence
iobm->prev_max
iobm->map_bytes
iobm->map_all

Only build tested.

Thanks,

Ingo

arch/x86/include/asm/processor.h | 38 +++++++++++++++++++++++---------------
arch/x86/kernel/cpu/common.c | 6 +++---
arch/x86/kernel/process.c | 14 +++++++-------
3 files changed, 33 insertions(+), 25 deletions(-)

diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 933f0b9b1cd7..4358ae63c252 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -333,11 +333,11 @@ struct x86_hw_tss {
#define IO_BITMAP_LONGS (IO_BITMAP_BYTES / sizeof(long))

#define IO_BITMAP_OFFSET_VALID_MAP \
- (offsetof(struct tss_struct, io_bitmap_bytes) - \
+ (offsetof(struct tss_struct, io_bitmap.map_bytes) - \
offsetof(struct tss_struct, x86_tss))

#define IO_BITMAP_OFFSET_VALID_ALL \
- (offsetof(struct tss_struct, io_bitmap_all) - \
+ (offsetof(struct tss_struct, io_bitmap.map_all) - \
offsetof(struct tss_struct, x86_tss))

/*
@@ -361,14 +361,11 @@ struct entry_stack_page {
struct entry_stack stack;
} __aligned(PAGE_SIZE);

-struct tss_struct {
- /*
- * The fixed hardware portion. This must not cross a page boundary
- * at risk of violating the SDM's advice and potentially triggering
- * errata.
- */
- struct x86_hw_tss x86_tss;
-
+/*
+ * All IO bitmap related data stored in the TSS:
+ */
+struct x86_io_bitmap
+{
/*
* The bitmap pointer and the sequence number of the last active
* bitmap. last_bitmap cannot be dereferenced. It's solely for
@@ -384,7 +381,7 @@ struct tss_struct {
* outside of the TSS limit. So for sane tasks there is no need to
* actually touch the io_bitmap at all.
*/
- unsigned int io_bitmap_prev_max;
+ unsigned int prev_max;

/*
* The extra 1 is there because the CPU will access an
@@ -392,14 +389,25 @@ struct tss_struct {
* bitmap. The extra byte must be all 1 bits, and must
* be within the limit.
*/
- unsigned char io_bitmap_bytes[IO_BITMAP_BYTES + 1]
- __aligned(sizeof(unsigned long));
+ unsigned char map_bytes[IO_BITMAP_BYTES + 1] __aligned(sizeof(unsigned long));
+
/*
* Special I/O bitmap to emulate IOPL(3). All bytes zero,
* except the additional byte at the end.
*/
- unsigned char io_bitmap_all[IO_BITMAP_BYTES + 1]
- __aligned(sizeof(unsigned long));
+ unsigned char map_all[IO_BITMAP_BYTES + 1] __aligned(sizeof(unsigned long));
+};
+
+struct tss_struct {
+ /*
+ * The fixed hardware portion. This must not cross a page boundary
+ * at risk of violating the SDM's advice and potentially triggering
+ * errata.
+ */
+ struct x86_hw_tss x86_tss;
+
+ struct x86_io_bitmap io_bitmap;
+
} __aligned(PAGE_SIZE);

DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index dfbe6fce04f3..eea0e3170de4 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1861,9 +1861,9 @@ void cpu_init(void)
/* Initialize the TSS. */
tss_setup_ist(tss);
tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
- tss->last_bitmap = NULL;
- tss->io_bitmap_prev_max = 0;
- memset(tss->io_bitmap_bytes, 0xff, sizeof(tss->io_bitmap_bytes));
+ tss->io_bitmap.last_bitmap = NULL;
+ tss->io_bitmap.prev_max = 0;
+ memset(tss->io_bitmap.map_bytes, 0xff, sizeof(tss->io_bitmap.map_bytes));
set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);

load_TR_desc();
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ccb48f4dab75..8179f3ee6a55 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -350,16 +350,16 @@ static void tss_copy_io_bitmap(struct tss_struct *tss, struct io_bitmap *iobm)
* permitted, then the copy needs to cover those as well so they
* get turned off.
*/
- memcpy(tss->io_bitmap_bytes, iobm->bitmap_bytes,
- max(tss->io_bitmap_prev_max, iobm->io_bitmap_max));
+ memcpy(tss->io_bitmap.map_bytes, iobm->bitmap_bytes,
+ max(tss->io_bitmap.prev_max, iobm->io_bitmap_max));

/*
* Store the new max and the sequence number of this bitmap
* and a pointer to the bitmap itself.
*/
- tss->io_bitmap_prev_max = iobm->io_bitmap_max;
- tss->last_sequence = iobm->sequence;
- tss->last_bitmap = iobm;
+ tss->io_bitmap.prev_max = iobm->io_bitmap_max;
+ tss->io_bitmap.last_sequence = iobm->sequence;
+ tss->io_bitmap.last_bitmap = iobm;
}

/**
@@ -388,8 +388,8 @@ void tss_update_io_bitmap(void)
* sequence number differs. The update time is
* accounted to the incoming task.
*/
- if (tss->last_bitmap != iobm ||
- tss->last_sequence != iobm->sequence)
+ if (tss->io_bitmap.last_bitmap != iobm ||
+ tss->io_bitmap.last_sequence != iobm->sequence)
tss_copy_io_bitmap(tss, iobm);

/* Enable the bitmap */