Re: [PATCH 3/6] i386 virtualization - Make ldt a desc struct

From: Chris Wright
Date: Tue Aug 16 2005 - 00:24:23 EST


* zach@xxxxxxxxxx (zach@xxxxxxxxxx) wrote:
> Make the LDT a desc_struct pointer, since this is what it actually is.

I like that plan.

> There is code which relies on the fact that LDTs are allocated in page
> chunks, and it is both cleaner and more convenient to keep the rather
> poorly named "size" variable from the LDT in terms of LDT pages.

I noticed it's replaced by context.ldt and context.ldt_pages, which
appear to be decoupling the overloaded use from before. Looks sane.
More comments below.

> Signed-off-by: Zachary Amsden <zach@xxxxxxxxxx>
> Index: linux-2.6.13/include/asm-i386/mmu.h
> ===================================================================
> --- linux-2.6.13.orig/include/asm-i386/mmu.h 2005-08-15 11:16:59.000000000 -0700
> +++ linux-2.6.13/include/asm-i386/mmu.h 2005-08-15 11:19:49.000000000 -0700
> @@ -9,9 +9,9 @@
> * cpu_vm_mask is used to optimize ldt flushing.
> */
> typedef struct {
> - int size;
> struct semaphore sem;
> - void *ldt;
> + struct desc_struct *ldt;
> + int ldt_pages;
> } mm_context_t;
>
> #endif
> Index: linux-2.6.13/include/asm-i386/desc.h
> ===================================================================
> --- linux-2.6.13.orig/include/asm-i386/desc.h 2005-08-15 11:16:59.000000000 -0700
> +++ linux-2.6.13/include/asm-i386/desc.h 2005-08-15 11:19:49.000000000 -0700
> @@ -6,6 +6,9 @@
>
> #define CPU_16BIT_STACK_SIZE 1024
>
> +/* The number of LDT entries per page */
> +#define LDT_ENTRIES_PER_PAGE (PAGE_SIZE / LDT_ENTRY_SIZE)
> +
> #ifndef __ASSEMBLY__
>
> #include <linux/preempt.h>
> @@ -30,7 +33,7 @@
> static inline unsigned long get_desc_base(struct desc_struct *desc)
> {
> unsigned long base;
> - base = ((desc->a >> 16) & 0x0000ffff) |
> + base = (desc->a >> 16) |

Seemingly unrelated.

> ((desc->b << 16) & 0x00ff0000) |
> (desc->b & 0xff000000);
> return base;
> @@ -171,7 +174,7 @@
> static inline void load_LDT_nolock(mm_context_t *pc, int cpu)
> {
> void *segments = pc->ldt;
> - int count = pc->size;
> + int count = pc->ldt_pages * LDT_ENTRIES_PER_PAGE;
>
> if (likely(!count)) {
> segments = &default_ldt[0];
> Index: linux-2.6.13/include/asm-i386/mmu_context.h
> ===================================================================
> --- linux-2.6.13.orig/include/asm-i386/mmu_context.h 2005-08-15 11:16:59.000000000 -0700
> +++ linux-2.6.13/include/asm-i386/mmu_context.h 2005-08-15 11:19:49.000000000 -0700
> @@ -19,7 +19,7 @@
> memset(&mm->context, 0, sizeof(mm->context));
> init_MUTEX(&mm->context.sem);
> old_mm = current->mm;
> - if (old_mm && unlikely(old_mm->context.size > 0)) {
> + if (old_mm && unlikely(old_mm->context.ldt)) {
> retval = copy_ldt(&mm->context, &old_mm->context);
> }
> if (retval == 0)
> @@ -32,7 +32,7 @@
> */
> static inline void destroy_context(struct mm_struct *mm)
> {
> - if (unlikely(mm->context.size))
> + if (unlikely(mm->context.ldt))
> destroy_ldt(mm);
> del_lazy_mm(mm);
> }
> Index: linux-2.6.13/include/asm-i386/mach-default/mach_desc.h
> ===================================================================
> --- linux-2.6.13.orig/include/asm-i386/mach-default/mach_desc.h 2005-08-15 11:16:59.000000000 -0700
> +++ linux-2.6.13/include/asm-i386/mach-default/mach_desc.h 2005-08-15 11:19:49.000000000 -0700
> @@ -62,11 +62,10 @@
> _set_tssldt_desc(&get_cpu_gdt_table(cpu)[GDT_ENTRY_LDT], (int)addr, ((size << 3)-1), 0x82);
> }
>
> -static inline int write_ldt_entry(void *ldt, int entry, __u32 entry_a, __u32 entry_b)
> +static inline int write_ldt_entry(struct desc_struct *ldt, int entry, __u32 entry_a, __u32 entry_b)
> {
> - __u32 *lp = (__u32 *)((char *)ldt + entry*8);
> - *lp = entry_a;
> - *(lp+1) = entry_b;
> + ldt[entry].a = entry_a;
> + ldt[entry].b = entry_b;
> return 0;
> }
>
> Index: linux-2.6.13/arch/i386/kernel/ptrace.c
> ===================================================================
> --- linux-2.6.13.orig/arch/i386/kernel/ptrace.c 2005-08-15 11:16:59.000000000 -0700
> +++ linux-2.6.13/arch/i386/kernel/ptrace.c 2005-08-15 11:19:49.000000000 -0700
> @@ -164,18 +164,20 @@
> * and APM bios ones we just ignore here.
> */
> if (segment_from_ldt(seg)) {
> - u32 *desc;
> + mm_context_t *context;
> + struct desc_struct *desc;
> unsigned long base;
>
> - down(&child->mm->context.sem);
> - desc = child->mm->context.ldt + (seg & ~7);
> - base = (desc[0] >> 16) | ((desc[1] & 0xff) << 16) | (desc[1] & 0xff000000);
> + context = &child->mm->context;
> + down(&context->sem);
> + desc = &context->ldt[segment_index(seg)];
> + base = get_desc_base(desc);
>
> /* 16-bit code segment? */
> - if (!((desc[1] >> 22) & 1))
> + if (!get_desc_32bit(desc))
> addr &= 0xffff;
> addr += base;
> - up(&child->mm->context.sem);
> + up(&context->sem);
> }
> return addr;
> }
> Index: linux-2.6.13/arch/i386/kernel/ldt.c
> ===================================================================
> --- linux-2.6.13.orig/arch/i386/kernel/ldt.c 2005-08-15 11:16:59.000000000 -0700
> +++ linux-2.6.13/arch/i386/kernel/ldt.c 2005-08-15 11:19:49.000000000 -0700
> @@ -28,28 +28,27 @@
> }
> #endif
>
> -static inline int alloc_ldt(mm_context_t *pc, const int oldsize, int mincount, const int reload)
> +static inline int alloc_ldt(mm_context_t *pc, const int old_pages, int new_pages, const int reload)
> {
> - void *oldldt;
> - void *newldt;
> + struct desc_struct *oldldt;
> + struct desc_struct *newldt;
>

Not quite related here (since change was introduced in earlier patch),
but old alloc_ldt special cased when room was available. This is gone,
so am I reading this correctly, each time through it will allocate a
new one, and free the old one (if it existed)? Just double checking
that change doesn't introduce possible mem leak.

> - mincount = (mincount+511)&(~511);
> - if (mincount*LDT_ENTRY_SIZE > PAGE_SIZE)
> - newldt = vmalloc(mincount*LDT_ENTRY_SIZE);
> + if (new_pages > 1)
> + newldt = vmalloc(new_pages*PAGE_SIZE);
> else
> - newldt = kmalloc(mincount*LDT_ENTRY_SIZE, GFP_KERNEL);
> + newldt = kmalloc(PAGE_SIZE, GFP_KERNEL);

If so, then full page is likely to be reusable in common case, no? (If
there's such a thing as LDT common case ;-)

> if (!newldt)
> return -ENOMEM;
>
> - if (oldsize)
> - memcpy(newldt, pc->ldt, oldsize*LDT_ENTRY_SIZE);
> + if (old_pages)
> + memcpy(newldt, pc->ldt, old_pages*PAGE_SIZE);
> oldldt = pc->ldt;
> if (reload)
> - memset(newldt+oldsize*LDT_ENTRY_SIZE, 0, (mincount-oldsize)*LDT_ENTRY_SIZE);
> + memset(newldt+old_pages*LDT_ENTRIES_PER_PAGE, 0, (new_pages-old_pages)*PAGE_SIZE);

In fact, I _think_ this causes a problem. Who says newldt is bigger
than old one? This looks like user-triggerable oops to me.

> pc->ldt = newldt;
> wmb();
> - pc->size = mincount;
> + pc->ldt_pages = new_pages;
> wmb();
>
> /*
> @@ -60,20 +59,20 @@
> #ifdef CONFIG_SMP
> cpumask_t mask;
> preempt_disable();
> - SetPagesLDT(pc->ldt, (pc->size * LDT_ENTRY_SIZE) / PAGE_SIZE);
> + SetPagesLDT(pc->ldt, pc->ldt_pages);
> load_LDT(pc);
> mask = cpumask_of_cpu(smp_processor_id());
> if (!cpus_equal(current->mm->cpu_vm_mask, mask))
> smp_call_function(flush_ldt, NULL, 1, 1);
> preempt_enable();
> #else
> - SetPagesLDT(pc->ldt, (pc->size * LDT_ENTRY_SIZE) / PAGE_SIZE);
> + SetPagesLDT(pc->ldt, pc->ldt_pages);
> load_LDT(pc);
> #endif
> }
> - if (oldsize) {
> - ClearPagesLDT(oldldt, (oldsize * LDT_ENTRY_SIZE) / PAGE_SIZE);
> - if (oldsize*LDT_ENTRY_SIZE > PAGE_SIZE)
> + if (old_pages) {
> + ClearPagesLDT(oldldt, old_pages);
> + if (old_pages > 1)
> vfree(oldldt);
> else
> kfree(oldldt);
> @@ -86,10 +85,10 @@
> int err;
>
> down(&old->sem);
> - err = alloc_ldt(new, 0, old->size, 0);
> + err = alloc_ldt(new, 0, old->ldt_pages, 0);
> if (!err) {
> - memcpy(new->ldt, old->ldt, old->size*LDT_ENTRY_SIZE);
> - SetPagesLDT(new->ldt, (new->size * LDT_ENTRY_SIZE) / PAGE_SIZE);
> + memcpy(new->ldt, old->ldt, old->ldt_pages*PAGE_SIZE);
> + SetPagesLDT(new->ldt, new->ldt_pages);
> }
> up(&old->sem);
> return err;
> @@ -97,14 +96,16 @@
>
> void destroy_ldt(struct mm_struct *mm)
> {
> + int pages = mm->context.ldt_pages;
> +
> if (mm == current->active_mm)
> clear_LDT();
> - ClearPagesLDT(mm->context.ldt, (mm->context.size * LDT_ENTRY_SIZE) / PAGE_SIZE);
> - if (mm->context.size*LDT_ENTRY_SIZE > PAGE_SIZE)
> + ClearPagesLDT(mm->context.ldt, pages);
> + if (pages > 1)
> vfree(mm->context.ldt);
> else
> kfree(mm->context.ldt);
> - mm->context.size = 0;
> + mm->context.ldt_pages = 0;
> }
>
> static int read_ldt(void __user * ptr, unsigned long bytecount)
> @@ -113,13 +114,13 @@
> unsigned long size;
> struct mm_struct * mm = current->mm;
>
> - if (!mm->context.size)
> + if (!mm->context.ldt_pages)
> return 0;
> if (bytecount > LDT_ENTRY_SIZE*LDT_ENTRIES)
> bytecount = LDT_ENTRY_SIZE*LDT_ENTRIES;
>
> down(&mm->context.sem);
> - size = mm->context.size*LDT_ENTRY_SIZE;
> + size = mm->context.ldt_pages*PAGE_SIZE;
> if (size > bytecount)
> size = bytecount;

This now looks like you can leak data? Since full page is unlikely
used, but accounting is done in page sizes. Asking to read_ldt with
bytcount of PAGE_SIZE could give some uninitialzed data back to user.
Did I miss the spot where this is always zero-filled?

> @@ -166,6 +167,7 @@
> __u32 entry_1, entry_2;
> int error;
> struct user_desc ldt_info;
> + int page_number;
>
> error = -EINVAL;
> if (bytecount != sizeof(ldt_info))
> @@ -184,10 +186,11 @@
> goto out;
> }
>
> + page_number = ldt_info.entry_number / LDT_ENTRIES_PER_PAGE;
> down(&mm->context.sem);
> - if (ldt_info.entry_number >= mm->context.size) {
> - error = alloc_ldt(&current->mm->context, mm->context.size,
> - ldt_info.entry_number+1, 1);
> + if (page_number >= mm->context.ldt_pages) {
> + error = alloc_ldt(&current->mm->context, mm->context.ldt_pages,
> + page_number+1, 1);
> if (error < 0)
> goto out_unlock;
> }
> Index: linux-2.6.13/arch/i386/kernel/kprobes.c
> ===================================================================
> --- linux-2.6.13.orig/arch/i386/kernel/kprobes.c 2005-08-15 11:16:59.000000000 -0700
> +++ linux-2.6.13/arch/i386/kernel/kprobes.c 2005-08-15 11:19:49.000000000 -0700
> @@ -164,8 +164,7 @@
> */
> if (segment_from_ldt(regs->xcs) && (current->mm)) {
> struct desc_struct *desc;
> - desc = (struct desc_struct *) ((char *) current->mm->context.ldt +
> - (segment_index(regs->xcs) * 8));
> + desc = &current->mm->context.ldt[segment_index(regs->xcs)];
> addr = (kprobe_opcode_t *) (get_desc_base(desc) + regs->eip -
> sizeof(kprobe_opcode_t));
> } else {
>
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/