[RFC 4/4] trace_uprobe: Fix multiple update of same semaphores
From: Ravi Bangoria
Date: Wed Feb 28 2018 - 02:52:14 EST
For tiny binaries/libraries, different mmap regions points to
the same file portion. In such cases, we may increment semaphore
multiple times. But while de-registration, semaphore will get
decremented only once, leaving semaphore > 0 even if no one is
tracing on that marker.
Ensure increment and decrement happens in sync by keeping list
of mms in trace_uprobe. Increment semaphore only if mm is not
present in the list and decrement only if mm is present in the
list.
Example
# echo "p:sdt_tick/loop2 /tmp/tick:0x6e4 *0x10036" > uprobe_events
Before patch:
# echo 1 > events/sdt_tick/loop2/enable
# ./Workspace/sdt_prog/tick &
# dd if=/proc//mem bs=1 count=1 skip=268566582 2>/dev/null | xxd
0000000: 02 .
# echo 0 > events/sdt_tick/loop2/enable
# dd if=/proc//mem bs=1 count=1 skip=268566582 2>/dev/null | xxd
0000000: 01 .
After patch:
# echo 1 > events/sdt_tick/loop2/enable
# ./Workspace/sdt_prog/tick &
# dd if=/proc//mem bs=1 count=1 skip=268566582 2>/dev/null | xxd
0000000: 01 .
# echo 0 > events/sdt_tick/loop2/enable
# dd if=/proc//mem bs=1 count=1 skip=268566582 2>/dev/null | xxd
0000000: 00 .
Signed-off-by: Ravi Bangoria <ravi.bangoria@xxxxxxxxxxxxxxxxxx>
---
kernel/trace/trace_uprobe.c | 105 ++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 102 insertions(+), 3 deletions(-)
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index d14aafc..3f1e8bd 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -49,6 +49,11 @@ struct trace_uprobe_filter {
struct list_head perf_events;
};
+struct sdt_mm_list {
+ struct mm_struct *mm;
+ struct sdt_mm_list *next;
+};
+
/*
* uprobe event core functions
*/
@@ -60,6 +65,8 @@ struct trace_uprobe {
char *filename;
unsigned long offset;
unsigned long sdt_offset; /* sdt semaphore offset */
+ struct sdt_mm_list *sml;
+ struct rw_semaphore sml_rw_sem;
unsigned long nhit;
struct trace_probe tp;
};
@@ -273,6 +280,7 @@ static inline bool is_ret_probe(struct trace_uprobe *tu)
if (is_ret)
tu->consumer.ret_handler = uretprobe_dispatcher;
init_trace_uprobe_filter(&tu->filter);
+ init_rwsem(&tu->sml_rw_sem);
return tu;
error:
@@ -953,6 +961,75 @@ static bool sdt_valid_vma(struct trace_uprobe *tu, struct vm_area_struct *vma)
return 0;
}
+static bool sdt_check_mm_list(struct trace_uprobe *tu, struct mm_struct *mm)
+{
+ struct sdt_mm_list *tmp = tu->sml;
+
+ if (!tu->sml || !mm)
+ return false;
+
+ while (tmp) {
+ if (tmp->mm == mm)
+ return true;
+ tmp = tmp->next;
+ }
+
+ return false;
+}
+
+static void sdt_add_mm_list(struct trace_uprobe *tu, struct mm_struct *mm)
+{
+ struct sdt_mm_list *tmp;
+
+ tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+ if (!tmp) {
+ pr_info("sdt_add_mm_list failed.\n");
+ return;
+ }
+ tmp->mm = mm;
+ tmp->next = tu->sml;
+ tu->sml = tmp;
+}
+
+static void sdt_del_mm_list(struct trace_uprobe *tu, struct mm_struct *mm)
+{
+ struct sdt_mm_list *prev, *curr;
+
+ if (!tu->sml)
+ return;
+
+ if (tu->sml->mm == mm) {
+ curr = tu->sml;
+ tu->sml = tu->sml->next;
+ kfree(curr);
+ return;
+ }
+
+ prev = tu->sml;
+ curr = tu->sml->next;
+ while (curr) {
+ if (curr->mm == mm) {
+ prev->next = curr->next;
+ kfree(curr);
+ return;
+ }
+ prev = curr;
+ curr = curr->next;
+ }
+}
+
+static void sdt_flush_mm_list(struct trace_uprobe *tu)
+{
+ struct sdt_mm_list *next, *curr = tu->sml;
+
+ while (curr) {
+ next = curr->next;
+ kfree(curr);
+ curr = next;
+ }
+ tu->sml = NULL;
+}
+
/*
* TODO: Adding this defination in include/linux/uprobes.h throws
* warnings about address_sapce. Adding it here for the time being.
@@ -970,20 +1047,26 @@ static void sdt_increment_sem(struct trace_uprobe *tu)
if (IS_ERR(info))
goto out;
+ down_write(&tu->sml_rw_sem);
while (info) {
down_write(&info->mm->mmap_sem);
vma = sdt_find_vma(info->mm, tu);
if (!vma)
goto cont;
+ if (sdt_check_mm_list(tu, info->mm))
+ goto cont;
+
vaddr = offset_to_vaddr(vma, tu->sdt_offset);
- sdt_update_sem(info->mm, vaddr, 1);
+ if (!sdt_update_sem(info->mm, vaddr, 1))
+ sdt_add_mm_list(tu, info->mm);
cont:
up_write(&info->mm->mmap_sem);
mmput(info->mm);
info = free_uprobe_map_info(info);
}
+ up_write(&tu->sml_rw_sem);
out:
uprobe_end_dup_mmap();
@@ -1001,8 +1084,16 @@ void trace_uprobe_mmap_callback(struct vm_area_struct *vma)
!trace_probe_is_enabled(&tu->tp))
continue;
+ down_write(&tu->sml_rw_sem);
+ if (sdt_check_mm_list(tu, vma->vm_mm))
+ goto cont;
+
vaddr = offset_to_vaddr(vma, tu->sdt_offset);
- sdt_update_sem(vma->vm_mm, vaddr, 1);
+ if (!sdt_update_sem(vma->vm_mm, vaddr, 1))
+ sdt_add_mm_list(tu, vma->vm_mm);
+
+cont:
+ up_write(&tu->sml_rw_sem);
}
mutex_unlock(&uprobe_lock);
}
@@ -1017,7 +1108,11 @@ static void sdt_decrement_sem(struct trace_uprobe *tu)
if (IS_ERR(info))
return;
+ down_write(&tu->sml_rw_sem);
while (info) {
+ if (!sdt_check_mm_list(tu, info->mm))
+ goto cont;
+
down_write(&info->mm->mmap_sem);
vma = sdt_find_vma(info->mm, tu);
if (vma) {
@@ -1025,10 +1120,14 @@ static void sdt_decrement_sem(struct trace_uprobe *tu)
sdt_update_sem(info->mm, vaddr, -1);
}
up_write(&info->mm->mmap_sem);
-
+ sdt_del_mm_list(tu, info->mm);
+
+cont:
mmput(info->mm);
info = free_uprobe_map_info(info);
}
+ sdt_flush_mm_list(tu);
+ up_write(&tu->sml_rw_sem);
}
typedef bool (*filter_func_t)(struct uprobe_consumer *self,
--
1.8.3.1