[PATCH v5 3.1.0-rc4-tip 18/26] uprobes: slot allocation.

From: Srikar Dronamraju
Date: Tue Sep 20 2011 - 08:17:20 EST



One page of slots are allocated per mm.
On a probehit one free slot is acquired and released after
singlestep operation completes.

Signed-off-by: Jim Keniston <jkenisto@xxxxxxxxxx>
Signed-off-by: Srikar Dronamraju <srikar@xxxxxxxxxxxxxxxxxx>
---
include/linux/mm_types.h | 2
include/linux/uprobes.h | 22 ++++
kernel/fork.c | 2
kernel/uprobes.c | 246 +++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 267 insertions(+), 5 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 9aeb64f..aa2e427 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -12,6 +12,7 @@
#include <linux/completion.h>
#include <linux/cpumask.h>
#include <linux/page-debug-flags.h>
+#include <linux/uprobes.h>
#include <asm/page.h>
#include <asm/mmu.h>

@@ -351,6 +352,7 @@ struct mm_struct {
#endif
#ifdef CONFIG_UPROBES
atomic_t mm_uprobes_count;
+ struct uprobes_xol_area *uprobes_xol_area;
#endif
};

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 30576fa..a407d17 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -92,6 +92,27 @@ struct uprobe_task {
struct uprobe *active_uprobe;
};

+/*
+ * On a breakpoint hit, thread contests for a slot. It free the
+ * slot after singlestep. Only definite number of slots are
+ * allocated.
+ */
+
+struct uprobes_xol_area {
+ spinlock_t slot_lock; /* protects bitmap and slot (de)allocation*/
+ wait_queue_head_t wq; /* if all slots are busy */
+ atomic_t slot_count; /* currently in use slots */
+ unsigned long *bitmap; /* 0 = free slot */
+ struct page *page;
+
+ /*
+ * We keep the vma's vm_start rather than a pointer to the vma
+ * itself. The probed process or a naughty kernel module could make
+ * the vma go away, and we must handle that reasonably gracefully.
+ */
+ unsigned long vaddr; /* Page(s) of instruction slots */
+};
+
#ifdef CONFIG_UPROBES
extern int __weak set_bkpt(struct task_struct *tsk, struct uprobe *uprobe,
unsigned long vaddr);
@@ -105,6 +126,7 @@ extern int register_uprobe(struct inode *inode, loff_t offset,
extern void unregister_uprobe(struct inode *inode, loff_t offset,
struct uprobe_consumer *consumer);
extern void free_uprobe_utask(struct task_struct *tsk);
+extern void free_uprobes_xol_area(struct mm_struct *mm);
extern int mmap_uprobe(struct vm_area_struct *vma);
extern void munmap_uprobe(struct vm_area_struct *vma);
extern unsigned long __weak get_uprobe_bkpt_addr(struct pt_regs *regs);
diff --git a/kernel/fork.c b/kernel/fork.c
index 5914bc1..088a27c 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -557,6 +557,7 @@ void mmput(struct mm_struct *mm)
might_sleep();

if (atomic_dec_and_test(&mm->mm_users)) {
+ free_uprobes_xol_area(mm);
exit_aio(mm);
ksm_exit(mm);
khugepaged_exit(mm); /* must run before exit_mmap */
@@ -744,6 +745,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
#ifdef CONFIG_UPROBES
atomic_set(&mm->mm_uprobes_count,
atomic_read(&oldmm->mm_uprobes_count));
+ mm->uprobes_xol_area = NULL;
#endif

if (!mm_init(mm, tsk))
diff --git a/kernel/uprobes.c b/kernel/uprobes.c
index 083c577..ca1f622 100644
--- a/kernel/uprobes.c
+++ b/kernel/uprobes.c
@@ -31,8 +31,13 @@
#include <linux/swap.h> /* try_to_free_swap */
#include <linux/ptrace.h> /* user_enable_single_step */
#include <linux/kdebug.h> /* notifier mechanism */
+#include <linux/mman.h> /* PROT_EXEC, MAP_PRIVATE */
+#include <linux/init_task.h> /* init_cred */
#include <linux/uprobes.h>

+#define UINSNS_PER_PAGE (PAGE_SIZE/UPROBES_XOL_SLOT_BYTES)
+#define MAX_UPROBES_XOL_SLOTS UINSNS_PER_PAGE
+
static struct rb_root uprobes_tree = RB_ROOT;
static DEFINE_SPINLOCK(uprobes_treelock); /* serialize (un)register */
static DEFINE_MUTEX(uprobes_mmap_mutex); /* uprobe->pending_list */
@@ -49,15 +54,21 @@ struct vma_info {
};

/*
- * valid_vma: Verify if the specified vma is an executable vma
+ * valid_vma: Verify if the specified vma is an executable vma,
+ * but not an XOL vma.
* - Return 1 if the specified virtual address is in an
- * executable vma.
+ * executable vma, but not in an XOL vma.
*/
static bool valid_vma(struct vm_area_struct *vma)
{
+ struct uprobes_xol_area *area = vma->vm_mm->uprobes_xol_area;
+
if (!vma->vm_file)
return false;

+ if (area && (area->vaddr == vma->vm_start))
+ return false;
+
if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) ==
(VM_READ|VM_EXEC))
return true;
@@ -1034,6 +1045,218 @@ void munmap_uprobe(struct vm_area_struct *vma)
return;
}

+/* Slot allocation for XOL */
+static int xol_add_vma(struct uprobes_xol_area *area)
+{
+ const struct cred *curr_cred;
+ struct vm_area_struct *vma;
+ struct mm_struct *mm;
+ unsigned long addr;
+ int ret = -ENOMEM;
+
+ mm = get_task_mm(current);
+ if (!mm)
+ return -ESRCH;
+
+ down_write(&mm->mmap_sem);
+ if (mm->uprobes_xol_area) {
+ ret = -EALREADY;
+ goto fail;
+ }
+
+ /*
+ * Find the end of the top mapping and skip a page.
+ * If there is no space for PAGE_SIZE above
+ * that, mmap will ignore our address hint.
+ *
+ * override credentials otherwise anonymous memory might
+ * not be granted execute permission when the selinux
+ * security hooks have their way.
+ */
+ vma = rb_entry(rb_last(&mm->mm_rb), struct vm_area_struct, vm_rb);
+ addr = vma->vm_end + PAGE_SIZE;
+ curr_cred = override_creds(&init_cred);
+ addr = do_mmap_pgoff(NULL, addr, PAGE_SIZE, PROT_EXEC, MAP_PRIVATE, 0);
+ revert_creds(curr_cred);
+
+ if (addr & ~PAGE_MASK)
+ goto fail;
+ vma = find_vma(mm, addr);
+
+ /* Don't expand vma on mremap(). */
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTCOPY;
+ area->vaddr = vma->vm_start;
+ if (get_user_pages(current, mm, area->vaddr, 1, 1, 1, &area->page,
+ &vma) > 0)
+ ret = 0;
+
+fail:
+ up_write(&mm->mmap_sem);
+ mmput(mm);
+ return ret;
+}
+
+/*
+ * xol_alloc_area - Allocate process's uprobes_xol_area.
+ * This area will be used for storing instructions for execution out of
+ * line.
+ *
+ * Returns the allocated area or NULL.
+ */
+static struct uprobes_xol_area *xol_alloc_area(void)
+{
+ struct uprobes_xol_area *area = NULL;
+
+ area = kzalloc(sizeof(*area), GFP_KERNEL);
+ if (unlikely(!area))
+ return NULL;
+
+ area->bitmap = kzalloc(BITS_TO_LONGS(UINSNS_PER_PAGE) * sizeof(long),
+ GFP_KERNEL);
+
+ if (!area->bitmap)
+ goto fail;
+
+ init_waitqueue_head(&area->wq);
+ spin_lock_init(&area->slot_lock);
+ if (!xol_add_vma(area) && !current->mm->uprobes_xol_area) {
+ task_lock(current);
+ if (!current->mm->uprobes_xol_area) {
+ current->mm->uprobes_xol_area = area;
+ task_unlock(current);
+ return area;
+ }
+ task_unlock(current);
+ }
+
+fail:
+ kfree(area->bitmap);
+ kfree(area);
+ return current->mm->uprobes_xol_area;
+}
+
+/*
+ * free_uprobes_xol_area - Free the area allocated for slots.
+ */
+void free_uprobes_xol_area(struct mm_struct *mm)
+{
+ struct uprobes_xol_area *area = mm->uprobes_xol_area;
+
+ if (!area)
+ return;
+
+ put_page(area->page);
+ kfree(area->bitmap);
+ kfree(area);
+}
+
+static void xol_wait_event(struct uprobes_xol_area *area)
+{
+ if (atomic_read(&area->slot_count) >= UINSNS_PER_PAGE)
+ wait_event(area->wq,
+ (atomic_read(&area->slot_count) < UINSNS_PER_PAGE));
+}
+
+/*
+ * - search for a free slot.
+ */
+static unsigned long xol_take_insn_slot(struct uprobes_xol_area *area)
+{
+ unsigned long slot_addr, flags;
+ int slot_nr;
+
+ do {
+ spin_lock_irqsave(&area->slot_lock, flags);
+ slot_nr = find_first_zero_bit(area->bitmap, UINSNS_PER_PAGE);
+ if (slot_nr < UINSNS_PER_PAGE) {
+ __set_bit(slot_nr, area->bitmap);
+ slot_addr = area->vaddr +
+ (slot_nr * UPROBES_XOL_SLOT_BYTES);
+ atomic_inc(&area->slot_count);
+ }
+ spin_unlock_irqrestore(&area->slot_lock, flags);
+ if (slot_nr >= UINSNS_PER_PAGE)
+ xol_wait_event(area);
+
+ } while (slot_nr >= UINSNS_PER_PAGE);
+
+ return slot_addr;
+}
+
+/*
+ * xol_get_insn_slot - If was not allocated a slot, then
+ * allocate a slot.
+ * Returns the allocated slot address or 0.
+ */
+static unsigned long xol_get_insn_slot(struct uprobe *uprobe,
+ unsigned long slot_addr)
+{
+ struct uprobes_xol_area *area = current->mm->uprobes_xol_area;
+ unsigned long offset;
+ void *vaddr;
+
+ if (!area) {
+ area = xol_alloc_area();
+ if (!area)
+ return 0;
+ }
+ current->utask->xol_vaddr = xol_take_insn_slot(area);
+
+ /*
+ * Initialize the slot if xol_vaddr points to valid
+ * instruction slot.
+ */
+ if (unlikely(!current->utask->xol_vaddr))
+ return 0;
+
+ current->utask->vaddr = slot_addr;
+ offset = current->utask->xol_vaddr & ~PAGE_MASK;
+ vaddr = kmap_atomic(area->page);
+ memcpy(vaddr + offset, uprobe->insn, MAX_UINSN_BYTES);
+ kunmap_atomic(vaddr);
+ return current->utask->xol_vaddr;
+}
+
+/*
+ * xol_free_insn_slot - If slot was earlier allocated by
+ * @xol_get_insn_slot(), make the slot available for
+ * subsequent requests.
+ */
+static void xol_free_insn_slot(struct task_struct *tsk)
+{
+ struct uprobes_xol_area *area;
+ unsigned long vma_end;
+ unsigned long slot_addr;
+
+ if (!tsk->mm || !tsk->mm->uprobes_xol_area || !tsk->utask)
+ return;
+
+ slot_addr = tsk->utask->xol_vaddr;
+
+ if (unlikely(!slot_addr || IS_ERR_VALUE(slot_addr)))
+ return;
+
+ area = tsk->mm->uprobes_xol_area;
+ vma_end = area->vaddr + PAGE_SIZE;
+ if (area->vaddr <= slot_addr && slot_addr < vma_end) {
+ int slot_nr;
+ unsigned long offset = slot_addr - area->vaddr;
+ unsigned long flags;
+
+ slot_nr = offset / UPROBES_XOL_SLOT_BYTES;
+ if (slot_nr >= UINSNS_PER_PAGE)
+ return;
+
+ spin_lock_irqsave(&area->slot_lock, flags);
+ __clear_bit(slot_nr, area->bitmap);
+ spin_unlock_irqrestore(&area->slot_lock, flags);
+ atomic_dec(&area->slot_count);
+ if (waitqueue_active(&area->wq))
+ wake_up(&area->wq);
+ tsk->utask->xol_vaddr = 0;
+ }
+}
+
/**
* get_uprobe_bkpt_addr - compute address of bkpt given post-bkpt regs
* @regs: Reflects the saved state of the task after it has hit a breakpoint
@@ -1059,6 +1282,7 @@ void free_uprobe_utask(struct task_struct *tsk)
if (utask->active_uprobe)
put_uprobe(utask->active_uprobe);

+ xol_free_insn_slot(tsk);
kfree(utask);
tsk->utask = NULL;
}
@@ -1088,7 +1312,10 @@ static struct uprobe_task *add_utask(void)
static int pre_ssout(struct uprobe *uprobe, struct pt_regs *regs,
unsigned long vaddr)
{
- /* TODO: Yet to be implemented */
+ if (xol_get_insn_slot(uprobe, vaddr) && !pre_xol(uprobe, regs)) {
+ set_instruction_pointer(regs, current->utask->xol_vaddr);
+ return 0;
+ }
return -EFAULT;
}

@@ -1098,8 +1325,16 @@ static int pre_ssout(struct uprobe *uprobe, struct pt_regs *regs,
*/
static bool sstep_complete(struct uprobe *uprobe, struct pt_regs *regs)
{
- /* TODO: Yet to be implemented */
- return false;
+ unsigned long vaddr = instruction_pointer(regs);
+
+ /*
+ * If we have executed out of line, Instruction pointer
+ * cannot be same as virtual address of XOL slot.
+ */
+ if (vaddr == current->utask->xol_vaddr)
+ return false;
+ post_xol(uprobe, regs);
+ return true;
}

/*
@@ -1154,6 +1389,7 @@ void uprobe_notify_resume(struct pt_regs *regs)
utask->active_uprobe = NULL;
utask->state = UTASK_RUNNING;
user_disable_single_step(current);
+ xol_free_insn_slot(current);

/* TODO Stop queueing signals. */
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/