[GIT PULL] tracing: Updates for 4.8

From: Steven Rostedt
Date: Tue Jul 26 2016 - 18:46:56 EST



Linus,

This is mostly clean ups and small fixes. Some of the more visible
changes are:

. The function pid code uses the event pid filtering logic
. [ku]probe events have access to current->comm
. trace_printk now has sample code
. PCI devices now trace physical addresses
. stack tracing has less unnessary functions traced


Please pull the latest trace-v4.8 tree, which can be found at:


git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git
trace-v4.8

Tag SHA1: 50b6c56120fd1bf557fd8771db87c5baf15d57e2
Head SHA1: 78aebca2c955c1c5aeb48e12645e13fe3c3461f2


Andy Lutomirski (1):
tracing: Choose static tp_printk buffer by explicit nesting count

Bjorn Helgaas (1):
tracing: Expose CPU physical addresses (resource values) for PCI devices

Daniel Bristot de Oliveira (4):
tracing: Use outer () on __get_str() definition
tracing, RAS: Cleanup on __get_str() usage
tracing: Use __get_str() when manipulating strings
printk, tracing: Avoiding unneeded blank lines

Joel Fernandes (1):
tracing/function_graph: Fix filters for function_graph threshold

Namhyung Kim (1):
ftrace: Reduce size of function graph entries

Omar Sandoval (1):
tracing: expose current->comm to [ku]probe events

Steven Rostedt (2):
tracing: Make the pid filtering helper functions global
tracing: Move filtered_pid helper functions into trace.c

Steven Rostedt (Red Hat) (7):
tracing: Move the pid_list seq_file functions to be global
tracing: Move pid_list write processing into its own function
ftrace: Have set_ftrace_pid use the bitmap like events do
tracing: Add trace_printk sample code
tracing: Show the preempt count of when the event was called
tracing: Skip more functions when doing stack tracing of events
ftrace: Move toplevel init out of ftrace_init_tracefs()

Tom Zanussi (1):
tracing: Have HIST_TRIGGERS select TRACING

Wei Yongjun (1):
tracing: Using for_each_set_bit() to simplify trace_pid_write()

----
Documentation/trace/kprobetrace.txt | 3 +
Documentation/trace/uprobetracer.txt | 3 +
fs/nfs/nfs4trace.h | 4 +-
fs/nfs/nfstrace.h | 4 +-
include/linux/ftrace.h | 12 +-
include/ras/ras_event.h | 4 +-
include/trace/events/printk.h | 12 +-
include/trace/perf.h | 2 +-
include/trace/trace_events.h | 2 +-
kernel/trace/Kconfig | 1 +
kernel/trace/ftrace.c | 313 ++++++++++++++----------------
kernel/trace/trace.c | 358 +++++++++++++++++++++++++++++------
kernel/trace/trace.h | 48 ++++-
kernel/trace/trace_entries.h | 4 +-
kernel/trace/trace_events.c | 219 +++------------------
kernel/trace/trace_functions.c | 2 +-
kernel/trace/trace_functions_graph.c | 19 +-
kernel/trace/trace_kprobe.c | 1 +
kernel/trace/trace_mmiotrace.c | 10 +-
kernel/trace/trace_probe.c | 33 ++++
kernel/trace/trace_probe.h | 10 +
samples/Kconfig | 7 +
samples/Makefile | 2 +-
samples/trace_printk/Makefile | 6 +
samples/trace_printk/trace-printk.c | 56 ++++++
25 files changed, 667 insertions(+), 468 deletions(-)
create mode 100644 samples/trace_printk/Makefile
create mode 100644 samples/trace_printk/trace-printk.c
---------------------------
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index d68ea5fc812b..ea52ec1f8484 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -40,6 +40,7 @@ Synopsis of kprobe_events
$stackN : Fetch Nth entry of stack (N >= 0)
$stack : Fetch stack address.
$retval : Fetch return value.(*)
+ $comm : Fetch current task comm.
+|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
@@ -62,6 +63,8 @@ offset, and container-size (usually 32). The syntax is;

b<bit-width>@<bit-offset>/<container-size>

+For $comm, the default type is "string"; any other type is invalid.
+

Per-Probe Event Filtering
-------------------------
diff --git a/Documentation/trace/uprobetracer.txt b/Documentation/trace/uprobetracer.txt
index f1cf9a34ad9d..72d1cd4f7bf3 100644
--- a/Documentation/trace/uprobetracer.txt
+++ b/Documentation/trace/uprobetracer.txt
@@ -36,6 +36,7 @@ Synopsis of uprobe_tracer
$stackN : Fetch Nth entry of stack (N >= 0)
$stack : Fetch stack address.
$retval : Fetch return value.(*)
+ $comm : Fetch current task comm.
+|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
@@ -57,6 +58,8 @@ offset, and container-size (usually 32). The syntax is;

b<bit-width>@<bit-offset>/<container-size>

+For $comm, the default type is "string"; any other type is invalid.
+

Event Profiling
---------------
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 9c150b153782..cfb8f7ce5cf6 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -1235,8 +1235,8 @@ DECLARE_EVENT_CLASS(nfs4_idmap_event,
len = 0;
__entry->error = error < 0 ? error : 0;
__entry->id = id;
- memcpy(__get_dynamic_array(name), name, len);
- ((char *)__get_dynamic_array(name))[len] = 0;
+ memcpy(__get_str(name), name, len);
+ __get_str(name)[len] = 0;
),

TP_printk(
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index 0b9e5cc9a747..31c7763b94d5 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -707,9 +707,9 @@ TRACE_EVENT(nfs_sillyrename_unlink,
__entry->dev = dir->i_sb->s_dev;
__entry->dir = NFS_FILEID(dir);
__entry->error = error;
- memcpy(__get_dynamic_array(name),
+ memcpy(__get_str(name),
data->args.name.name, len);
- ((char *)__get_dynamic_array(name))[len] = 0;
+ __get_str(name)[len] = 0;
),

TP_printk(
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 66a36a815f0a..7d565afe35d2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -754,23 +754,27 @@ static inline void ftrace_init(void) { }

/*
* Structure that defines an entry function trace.
+ * It's already packed but the attribute "packed" is needed
+ * to remove extra padding at the end.
*/
struct ftrace_graph_ent {
unsigned long func; /* Current function */
int depth;
-};
+} __packed;

/*
* Structure that defines a return function trace.
+ * It's already packed but the attribute "packed" is needed
+ * to remove extra padding at the end.
*/
struct ftrace_graph_ret {
unsigned long func; /* Current function */
- unsigned long long calltime;
- unsigned long long rettime;
/* Number of functions that overran the depth limit for current task */
unsigned long overrun;
+ unsigned long long calltime;
+ unsigned long long rettime;
int depth;
-};
+} __packed;

/* Type of the callback handlers for tracing function graph*/
typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index 1443d79e4fe6..1791a12cfa85 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -147,7 +147,7 @@ TRACE_EVENT(mc_event,
__entry->error_count,
mc_event_error_type(__entry->error_type),
__entry->error_count > 1 ? "s" : "",
- ((char *)__get_str(msg))[0] ? " " : "",
+ __get_str(msg)[0] ? " " : "",
__get_str(msg),
__get_str(label),
__entry->mc_index,
@@ -157,7 +157,7 @@ TRACE_EVENT(mc_event,
__entry->address,
1 << __entry->grain_bits,
__entry->syndrome,
- ((char *)__get_str(driver_detail))[0] ? " " : "",
+ __get_str(driver_detail)[0] ? " " : "",
__get_str(driver_detail))
);

diff --git a/include/trace/events/printk.h b/include/trace/events/printk.h
index c008bc99f9fa..f350170059c6 100644
--- a/include/trace/events/printk.h
+++ b/include/trace/events/printk.h
@@ -16,8 +16,16 @@ TRACE_EVENT(console,
),

TP_fast_assign(
- memcpy(__get_dynamic_array(msg), text, len);
- ((char *)__get_dynamic_array(msg))[len] = 0;
+ /*
+ * Each trace entry is printed in a new line.
+ * If the msg finishes with '\n', cut it off
+ * to avoid blank lines in the trace.
+ */
+ if ((len > 0) && (text[len-1] == '\n'))
+ len -= 1;
+
+ memcpy(__get_str(msg), text, len);
+ __get_str(msg)[len] = 0;
),

TP_printk("%s", __get_str(msg))
diff --git a/include/trace/perf.h b/include/trace/perf.h
index 88de5c205e86..04fe68bbe767 100644
--- a/include/trace/perf.h
+++ b/include/trace/perf.h
@@ -15,7 +15,7 @@
((__entry->__data_loc_##field >> 16) & 0xffff)

#undef __get_str
-#define __get_str(field) (char *)__get_dynamic_array(field)
+#define __get_str(field) ((char *)__get_dynamic_array(field))

#undef __get_bitmask
#define __get_bitmask(field) (char *)__get_dynamic_array(field)
diff --git a/include/trace/trace_events.h b/include/trace/trace_events.h
index 80679a9fae65..467e12f780d8 100644
--- a/include/trace/trace_events.h
+++ b/include/trace/trace_events.h
@@ -256,7 +256,7 @@ TRACE_MAKE_SYSTEM_STR();
((__entry->__data_loc_##field >> 16) & 0xffff)

#undef __get_str
-#define __get_str(field) (char *)__get_dynamic_array(field)
+#define __get_str(field) ((char *)__get_dynamic_array(field))

#undef __get_bitmask
#define __get_bitmask(field) \
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index fafeaf803bd0..f4b86e8ca1e7 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -542,6 +542,7 @@ config HIST_TRIGGERS
bool "Histogram triggers"
depends on ARCH_HAVE_NMI_SAFE_CMPXCHG
select TRACING_MAP
+ select TRACING
default n
help
Hist triggers allow one or more arbitrary trace event fields
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 900dbb1efff2..84752c8e28b5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -89,16 +89,16 @@ struct ftrace_ops *function_trace_op __read_mostly = &ftrace_list_end;
/* What to set function_trace_op to */
static struct ftrace_ops *set_function_trace_op;

-/* List for set_ftrace_pid's pids. */
-LIST_HEAD(ftrace_pids);
-struct ftrace_pid {
- struct list_head list;
- struct pid *pid;
-};
-
-static bool ftrace_pids_enabled(void)
+static bool ftrace_pids_enabled(struct ftrace_ops *ops)
{
- return !list_empty(&ftrace_pids);
+ struct trace_array *tr;
+
+ if (!(ops->flags & FTRACE_OPS_FL_PID) || !ops->private)
+ return false;
+
+ tr = ops->private;
+
+ return tr->function_pids != NULL;
}

static void ftrace_update_trampoline(struct ftrace_ops *ops);
@@ -179,7 +179,9 @@ int ftrace_nr_registered_ops(void)
static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip,
struct ftrace_ops *op, struct pt_regs *regs)
{
- if (!test_tsk_trace_trace(current))
+ struct trace_array *tr = op->private;
+
+ if (tr && this_cpu_read(tr->trace_buffer.data->ftrace_ignore_pid))
return;

op->saved_func(ip, parent_ip, op, regs);
@@ -417,7 +419,7 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
/* Always save the function, and reset at unregistering */
ops->saved_func = ops->func;

- if (ops->flags & FTRACE_OPS_FL_PID && ftrace_pids_enabled())
+ if (ftrace_pids_enabled(ops))
ops->func = ftrace_pid_func;

ftrace_update_trampoline(ops);
@@ -450,7 +452,6 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)

static void ftrace_update_pid_func(void)
{
- bool enabled = ftrace_pids_enabled();
struct ftrace_ops *op;

/* Only do something if we are tracing something */
@@ -459,8 +460,8 @@ static void ftrace_update_pid_func(void)

do_for_each_ftrace_op(op, ftrace_ops_list) {
if (op->flags & FTRACE_OPS_FL_PID) {
- op->func = enabled ? ftrace_pid_func :
- op->saved_func;
+ op->func = ftrace_pids_enabled(op) ?
+ ftrace_pid_func : op->saved_func;
ftrace_update_trampoline(op);
}
} while_for_each_ftrace_op(op);
@@ -5324,179 +5325,99 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
return ops->func;
}

-static void clear_ftrace_swapper(void)
+static void
+ftrace_filter_pid_sched_switch_probe(void *data, bool preempt,
+ struct task_struct *prev, struct task_struct *next)
{
- struct task_struct *p;
- int cpu;
+ struct trace_array *tr = data;
+ struct trace_pid_list *pid_list;

- get_online_cpus();
- for_each_online_cpu(cpu) {
- p = idle_task(cpu);
- clear_tsk_trace_trace(p);
- }
- put_online_cpus();
-}
-
-static void set_ftrace_swapper(void)
-{
- struct task_struct *p;
- int cpu;
+ pid_list = rcu_dereference_sched(tr->function_pids);

- get_online_cpus();
- for_each_online_cpu(cpu) {
- p = idle_task(cpu);
- set_tsk_trace_trace(p);
- }
- put_online_cpus();
+ this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid,
+ trace_ignore_this_task(pid_list, next));
}

-static void clear_ftrace_pid(struct pid *pid)
+static void clear_ftrace_pids(struct trace_array *tr)
{
- struct task_struct *p;
+ struct trace_pid_list *pid_list;
+ int cpu;

- rcu_read_lock();
- do_each_pid_task(pid, PIDTYPE_PID, p) {
- clear_tsk_trace_trace(p);
- } while_each_pid_task(pid, PIDTYPE_PID, p);
- rcu_read_unlock();
+ pid_list = rcu_dereference_protected(tr->function_pids,
+ lockdep_is_held(&ftrace_lock));
+ if (!pid_list)
+ return;

- put_pid(pid);
-}
+ unregister_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr);

-static void set_ftrace_pid(struct pid *pid)
-{
- struct task_struct *p;
+ for_each_possible_cpu(cpu)
+ per_cpu_ptr(tr->trace_buffer.data, cpu)->ftrace_ignore_pid = false;

- rcu_read_lock();
- do_each_pid_task(pid, PIDTYPE_PID, p) {
- set_tsk_trace_trace(p);
- } while_each_pid_task(pid, PIDTYPE_PID, p);
- rcu_read_unlock();
-}
+ rcu_assign_pointer(tr->function_pids, NULL);

-static void clear_ftrace_pid_task(struct pid *pid)
-{
- if (pid == ftrace_swapper_pid)
- clear_ftrace_swapper();
- else
- clear_ftrace_pid(pid);
-}
+ /* Wait till all users are no longer using pid filtering */
+ synchronize_sched();

-static void set_ftrace_pid_task(struct pid *pid)
-{
- if (pid == ftrace_swapper_pid)
- set_ftrace_swapper();
- else
- set_ftrace_pid(pid);
+ trace_free_pid_list(pid_list);
}

-static int ftrace_pid_add(int p)
+static void ftrace_pid_reset(struct trace_array *tr)
{
- struct pid *pid;
- struct ftrace_pid *fpid;
- int ret = -EINVAL;
-
mutex_lock(&ftrace_lock);
-
- if (!p)
- pid = ftrace_swapper_pid;
- else
- pid = find_get_pid(p);
-
- if (!pid)
- goto out;
-
- ret = 0;
-
- list_for_each_entry(fpid, &ftrace_pids, list)
- if (fpid->pid == pid)
- goto out_put;
-
- ret = -ENOMEM;
-
- fpid = kmalloc(sizeof(*fpid), GFP_KERNEL);
- if (!fpid)
- goto out_put;
-
- list_add(&fpid->list, &ftrace_pids);
- fpid->pid = pid;
-
- set_ftrace_pid_task(pid);
+ clear_ftrace_pids(tr);

ftrace_update_pid_func();
-
ftrace_startup_all(0);

mutex_unlock(&ftrace_lock);
- return 0;
-
-out_put:
- if (pid != ftrace_swapper_pid)
- put_pid(pid);
-
-out:
- mutex_unlock(&ftrace_lock);
- return ret;
}

-static void ftrace_pid_reset(void)
-{
- struct ftrace_pid *fpid, *safe;
-
- mutex_lock(&ftrace_lock);
- list_for_each_entry_safe(fpid, safe, &ftrace_pids, list) {
- struct pid *pid = fpid->pid;
-
- clear_ftrace_pid_task(pid);
-
- list_del(&fpid->list);
- kfree(fpid);
- }
-
- ftrace_update_pid_func();
- ftrace_startup_all(0);
-
- mutex_unlock(&ftrace_lock);
-}
+/* Greater than any max PID */
+#define FTRACE_NO_PIDS (void *)(PID_MAX_LIMIT + 1)

static void *fpid_start(struct seq_file *m, loff_t *pos)
+ __acquires(RCU)
{
+ struct trace_pid_list *pid_list;
+ struct trace_array *tr = m->private;
+
mutex_lock(&ftrace_lock);
+ rcu_read_lock_sched();

- if (!ftrace_pids_enabled() && (!*pos))
- return (void *) 1;
+ pid_list = rcu_dereference_sched(tr->function_pids);

- return seq_list_start(&ftrace_pids, *pos);
+ if (!pid_list)
+ return !(*pos) ? FTRACE_NO_PIDS : NULL;
+
+ return trace_pid_start(pid_list, pos);
}

static void *fpid_next(struct seq_file *m, void *v, loff_t *pos)
{
- if (v == (void *)1)
+ struct trace_array *tr = m->private;
+ struct trace_pid_list *pid_list = rcu_dereference_sched(tr->function_pids);
+
+ if (v == FTRACE_NO_PIDS)
return NULL;

- return seq_list_next(v, &ftrace_pids, pos);
+ return trace_pid_next(pid_list, v, pos);
}

static void fpid_stop(struct seq_file *m, void *p)
+ __releases(RCU)
{
+ rcu_read_unlock_sched();
mutex_unlock(&ftrace_lock);
}

static int fpid_show(struct seq_file *m, void *v)
{
- const struct ftrace_pid *fpid = list_entry(v, struct ftrace_pid, list);
-
- if (v == (void *)1) {
+ if (v == FTRACE_NO_PIDS) {
seq_puts(m, "no pid\n");
return 0;
}

- if (fpid->pid == ftrace_swapper_pid)
- seq_puts(m, "swapper tasks\n");
- else
- seq_printf(m, "%u\n", pid_vnr(fpid->pid));
-
- return 0;
+ return trace_pid_show(m, v);
}

static const struct seq_operations ftrace_pid_sops = {
@@ -5509,58 +5430,103 @@ static const struct seq_operations ftrace_pid_sops = {
static int
ftrace_pid_open(struct inode *inode, struct file *file)
{
+ struct trace_array *tr = inode->i_private;
+ struct seq_file *m;
int ret = 0;

+ if (trace_array_get(tr) < 0)
+ return -ENODEV;
+
if ((file->f_mode & FMODE_WRITE) &&
(file->f_flags & O_TRUNC))
- ftrace_pid_reset();
+ ftrace_pid_reset(tr);

- if (file->f_mode & FMODE_READ)
- ret = seq_open(file, &ftrace_pid_sops);
+ ret = seq_open(file, &ftrace_pid_sops);
+ if (ret < 0) {
+ trace_array_put(tr);
+ } else {
+ m = file->private_data;
+ /* copy tr over to seq ops */
+ m->private = tr;
+ }

return ret;
}

+static void ignore_task_cpu(void *data)
+{
+ struct trace_array *tr = data;
+ struct trace_pid_list *pid_list;
+
+ /*
+ * This function is called by on_each_cpu() while the
+ * event_mutex is held.
+ */
+ pid_list = rcu_dereference_protected(tr->function_pids,
+ mutex_is_locked(&ftrace_lock));
+
+ this_cpu_write(tr->trace_buffer.data->ftrace_ignore_pid,
+ trace_ignore_this_task(pid_list, current));
+}
+
static ssize_t
ftrace_pid_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
- char buf[64], *tmp;
- long val;
- int ret;
+ struct seq_file *m = filp->private_data;
+ struct trace_array *tr = m->private;
+ struct trace_pid_list *filtered_pids = NULL;
+ struct trace_pid_list *pid_list;
+ ssize_t ret;

- if (cnt >= sizeof(buf))
- return -EINVAL;
+ if (!cnt)
+ return 0;
+
+ mutex_lock(&ftrace_lock);
+
+ filtered_pids = rcu_dereference_protected(tr->function_pids,
+ lockdep_is_held(&ftrace_lock));
+
+ ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
+ if (ret < 0)
+ goto out;

- if (copy_from_user(&buf, ubuf, cnt))
- return -EFAULT;
+ rcu_assign_pointer(tr->function_pids, pid_list);

- buf[cnt] = 0;
+ if (filtered_pids) {
+ synchronize_sched();
+ trace_free_pid_list(filtered_pids);
+ } else if (pid_list) {
+ /* Register a probe to set whether to ignore the tracing of a task */
+ register_trace_sched_switch(ftrace_filter_pid_sched_switch_probe, tr);
+ }

/*
- * Allow "echo > set_ftrace_pid" or "echo -n '' > set_ftrace_pid"
- * to clean the filter quietly.
+ * Ignoring of pids is done at task switch. But we have to
+ * check for those tasks that are currently running.
+ * Always do this in case a pid was appended or removed.
*/
- tmp = strstrip(buf);
- if (strlen(tmp) == 0)
- return 1;
+ on_each_cpu(ignore_task_cpu, tr, 1);

- ret = kstrtol(tmp, 10, &val);
- if (ret < 0)
- return ret;
+ ftrace_update_pid_func();
+ ftrace_startup_all(0);
+ out:
+ mutex_unlock(&ftrace_lock);

- ret = ftrace_pid_add(val);
+ if (ret > 0)
+ *ppos += ret;

- return ret ? ret : cnt;
+ return ret;
}

static int
ftrace_pid_release(struct inode *inode, struct file *file)
{
- if (file->f_mode & FMODE_READ)
- seq_release(inode, file);
+ struct trace_array *tr = inode->i_private;

- return 0;
+ trace_array_put(tr);
+
+ return seq_release(inode, file);
}

static const struct file_operations ftrace_pid_fops = {
@@ -5571,24 +5537,21 @@ static const struct file_operations ftrace_pid_fops = {
.release = ftrace_pid_release,
};

-static __init int ftrace_init_tracefs(void)
+void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer)
{
- struct dentry *d_tracer;
+ trace_create_file("set_ftrace_pid", 0644, d_tracer,
+ tr, &ftrace_pid_fops);
+}

- d_tracer = tracing_init_dentry();
- if (IS_ERR(d_tracer))
- return 0;
+void __init ftrace_init_tracefs_toplevel(struct trace_array *tr,
+ struct dentry *d_tracer)
+{
+ /* Only the top level directory has the dyn_tracefs and profile */
+ WARN_ON(!(tr->flags & TRACE_ARRAY_FL_GLOBAL));

ftrace_init_dyn_tracefs(d_tracer);
-
- trace_create_file("set_ftrace_pid", 0644, d_tracer,
- NULL, &ftrace_pid_fops);
-
ftrace_profile_tracefs(d_tracer);
-
- return 0;
}
-fs_initcall(ftrace_init_tracefs);

/**
* ftrace_kill - kill ftrace
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8a4bd6b68a0b..dade4c9559cc 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -25,7 +25,7 @@
#include <linux/hardirq.h>
#include <linux/linkage.h>
#include <linux/uaccess.h>
-#include <linux/kprobes.h>
+#include <linux/vmalloc.h>
#include <linux/ftrace.h>
#include <linux/module.h>
#include <linux/percpu.h>
@@ -319,6 +319,258 @@ int call_filter_check_discard(struct trace_event_call *call, void *rec,
return 0;
}

+void trace_free_pid_list(struct trace_pid_list *pid_list)
+{
+ vfree(pid_list->pids);
+ kfree(pid_list);
+}
+
+/**
+ * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
+ * @filtered_pids: The list of pids to check
+ * @search_pid: The PID to find in @filtered_pids
+ *
+ * Returns true if @search_pid is fonud in @filtered_pids, and false otherwis.
+ */
+bool
+trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
+{
+ /*
+ * If pid_max changed after filtered_pids was created, we
+ * by default ignore all pids greater than the previous pid_max.
+ */
+ if (search_pid >= filtered_pids->pid_max)
+ return false;
+
+ return test_bit(search_pid, filtered_pids->pids);
+}
+
+/**
+ * trace_ignore_this_task - should a task be ignored for tracing
+ * @filtered_pids: The list of pids to check
+ * @task: The task that should be ignored if not filtered
+ *
+ * Checks if @task should be traced or not from @filtered_pids.
+ * Returns true if @task should *NOT* be traced.
+ * Returns false if @task should be traced.
+ */
+bool
+trace_ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
+{
+ /*
+ * Return false, because if filtered_pids does not exist,
+ * all pids are good to trace.
+ */
+ if (!filtered_pids)
+ return false;
+
+ return !trace_find_filtered_pid(filtered_pids, task->pid);
+}
+
+/**
+ * trace_pid_filter_add_remove - Add or remove a task from a pid_list
+ * @pid_list: The list to modify
+ * @self: The current task for fork or NULL for exit
+ * @task: The task to add or remove
+ *
+ * If adding a task, if @self is defined, the task is only added if @self
+ * is also included in @pid_list. This happens on fork and tasks should
+ * only be added when the parent is listed. If @self is NULL, then the
+ * @task pid will be removed from the list, which would happen on exit
+ * of a task.
+ */
+void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
+ struct task_struct *self,
+ struct task_struct *task)
+{
+ if (!pid_list)
+ return;
+
+ /* For forks, we only add if the forking task is listed */
+ if (self) {
+ if (!trace_find_filtered_pid(pid_list, self->pid))
+ return;
+ }
+
+ /* Sorry, but we don't support pid_max changing after setting */
+ if (task->pid >= pid_list->pid_max)
+ return;
+
+ /* "self" is set for forks, and NULL for exits */
+ if (self)
+ set_bit(task->pid, pid_list->pids);
+ else
+ clear_bit(task->pid, pid_list->pids);
+}
+
+/**
+ * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
+ * @pid_list: The pid list to show
+ * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
+ * @pos: The position of the file
+ *
+ * This is used by the seq_file "next" operation to iterate the pids
+ * listed in a trace_pid_list structure.
+ *
+ * Returns the pid+1 as we want to display pid of zero, but NULL would
+ * stop the iteration.
+ */
+void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
+{
+ unsigned long pid = (unsigned long)v;
+
+ (*pos)++;
+
+ /* pid already is +1 of the actual prevous bit */
+ pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);
+
+ /* Return pid + 1 to allow zero to be represented */
+ if (pid < pid_list->pid_max)
+ return (void *)(pid + 1);
+
+ return NULL;
+}
+
+/**
+ * trace_pid_start - Used for seq_file to start reading pid lists
+ * @pid_list: The pid list to show
+ * @pos: The position of the file
+ *
+ * This is used by seq_file "start" operation to start the iteration
+ * of listing pids.
+ *
+ * Returns the pid+1 as we want to display pid of zero, but NULL would
+ * stop the iteration.
+ */
+void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
+{
+ unsigned long pid;
+ loff_t l = 0;
+
+ pid = find_first_bit(pid_list->pids, pid_list->pid_max);
+ if (pid >= pid_list->pid_max)
+ return NULL;
+
+ /* Return pid + 1 so that zero can be the exit value */
+ for (pid++; pid && l < *pos;
+ pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
+ ;
+ return (void *)pid;
+}
+
+/**
+ * trace_pid_show - show the current pid in seq_file processing
+ * @m: The seq_file structure to write into
+ * @v: A void pointer of the pid (+1) value to display
+ *
+ * Can be directly used by seq_file operations to display the current
+ * pid value.
+ */
+int trace_pid_show(struct seq_file *m, void *v)
+{
+ unsigned long pid = (unsigned long)v - 1;
+
+ seq_printf(m, "%lu\n", pid);
+ return 0;
+}
+
+/* 128 should be much more than enough */
+#define PID_BUF_SIZE 127
+
+int trace_pid_write(struct trace_pid_list *filtered_pids,
+ struct trace_pid_list **new_pid_list,
+ const char __user *ubuf, size_t cnt)
+{
+ struct trace_pid_list *pid_list;
+ struct trace_parser parser;
+ unsigned long val;
+ int nr_pids = 0;
+ ssize_t read = 0;
+ ssize_t ret = 0;
+ loff_t pos;
+ pid_t pid;
+
+ if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
+ return -ENOMEM;
+
+ /*
+ * Always recreate a new array. The write is an all or nothing
+ * operation. Always create a new array when adding new pids by
+ * the user. If the operation fails, then the current list is
+ * not modified.
+ */
+ pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
+ if (!pid_list)
+ return -ENOMEM;
+
+ pid_list->pid_max = READ_ONCE(pid_max);
+
+ /* Only truncating will shrink pid_max */
+ if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
+ pid_list->pid_max = filtered_pids->pid_max;
+
+ pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
+ if (!pid_list->pids) {
+ kfree(pid_list);
+ return -ENOMEM;
+ }
+
+ if (filtered_pids) {
+ /* copy the current bits to the new max */
+ for_each_set_bit(pid, filtered_pids->pids,
+ filtered_pids->pid_max) {
+ set_bit(pid, pid_list->pids);
+ nr_pids++;
+ }
+ }
+
+ while (cnt > 0) {
+
+ pos = 0;
+
+ ret = trace_get_user(&parser, ubuf, cnt, &pos);
+ if (ret < 0 || !trace_parser_loaded(&parser))
+ break;
+
+ read += ret;
+ ubuf += ret;
+ cnt -= ret;
+
+ parser.buffer[parser.idx] = 0;
+
+ ret = -EINVAL;
+ if (kstrtoul(parser.buffer, 0, &val))
+ break;
+ if (val >= pid_list->pid_max)
+ break;
+
+ pid = (pid_t)val;
+
+ set_bit(pid, pid_list->pids);
+ nr_pids++;
+
+ trace_parser_clear(&parser);
+ ret = 0;
+ }
+ trace_parser_put(&parser);
+
+ if (ret < 0) {
+ trace_free_pid_list(pid_list);
+ return ret;
+ }
+
+ if (!nr_pids) {
+ /* Cleared the list of pids */
+ trace_free_pid_list(pid_list);
+ read = ret;
+ pid_list = NULL;
+ }
+
+ *new_pid_list = pid_list;
+
+ return read;
+}
+
static cycle_t buffer_ftrace_now(struct trace_buffer *buf, int cpu)
{
u64 ts;
@@ -1862,7 +2114,17 @@ void trace_buffer_unlock_commit_regs(struct trace_array *tr,
{
__buffer_unlock_commit(buffer, event);

- ftrace_trace_stack(tr, buffer, flags, 0, pc, regs);
+ /*
+ * If regs is not set, then skip the following callers:
+ * trace_buffer_unlock_commit_regs
+ * event_trigger_unlock_commit
+ * trace_event_buffer_commit
+ * trace_event_raw_event_sched_switch
+ * Note, we can still get here via blktrace, wakeup tracer
+ * and mmiotrace, but that's ok if they lose a function or
+ * two. They are that meaningful.
+ */
+ ftrace_trace_stack(tr, buffer, flags, regs ? 0 : 4, pc, regs);
ftrace_trace_userstack(buffer, flags, pc);
}

@@ -1913,6 +2175,13 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
trace.skip = skip;

/*
+ * Add two, for this function and the call to save_stack_trace()
+ * If regs is set, then these functions will not be in the way.
+ */
+ if (!regs)
+ trace.skip += 2;
+
+ /*
* Since events can happen in NMIs there's no safe way to
* use the per cpu ftrace_stacks. We reserve it and if an interrupt
* or NMI comes in, it will just have to use the default
@@ -2083,83 +2352,41 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)

/* created for use with alloc_percpu */
struct trace_buffer_struct {
- char buffer[TRACE_BUF_SIZE];
+ int nesting;
+ char buffer[4][TRACE_BUF_SIZE];
};

static struct trace_buffer_struct *trace_percpu_buffer;
-static struct trace_buffer_struct *trace_percpu_sirq_buffer;
-static struct trace_buffer_struct *trace_percpu_irq_buffer;
-static struct trace_buffer_struct *trace_percpu_nmi_buffer;

/*
- * The buffer used is dependent on the context. There is a per cpu
- * buffer for normal context, softirq contex, hard irq context and
- * for NMI context. Thise allows for lockless recording.
- *
- * Note, if the buffers failed to be allocated, then this returns NULL
+ * Thise allows for lockless recording. If we're nested too deeply, then
+ * this returns NULL.
*/
static char *get_trace_buf(void)
{
- struct trace_buffer_struct *percpu_buffer;
-
- /*
- * If we have allocated per cpu buffers, then we do not
- * need to do any locking.
- */
- if (in_nmi())
- percpu_buffer = trace_percpu_nmi_buffer;
- else if (in_irq())
- percpu_buffer = trace_percpu_irq_buffer;
- else if (in_softirq())
- percpu_buffer = trace_percpu_sirq_buffer;
- else
- percpu_buffer = trace_percpu_buffer;
+ struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);

- if (!percpu_buffer)
+ if (!buffer || buffer->nesting >= 4)
return NULL;

- return this_cpu_ptr(&percpu_buffer->buffer[0]);
+ return &buffer->buffer[buffer->nesting++][0];
+}
+
+static void put_trace_buf(void)
+{
+ this_cpu_dec(trace_percpu_buffer->nesting);
}

static int alloc_percpu_trace_buffer(void)
{
struct trace_buffer_struct *buffers;
- struct trace_buffer_struct *sirq_buffers;
- struct trace_buffer_struct *irq_buffers;
- struct trace_buffer_struct *nmi_buffers;

buffers = alloc_percpu(struct trace_buffer_struct);
- if (!buffers)
- goto err_warn;
-
- sirq_buffers = alloc_percpu(struct trace_buffer_struct);
- if (!sirq_buffers)
- goto err_sirq;
-
- irq_buffers = alloc_percpu(struct trace_buffer_struct);
- if (!irq_buffers)
- goto err_irq;
-
- nmi_buffers = alloc_percpu(struct trace_buffer_struct);
- if (!nmi_buffers)
- goto err_nmi;
+ if (WARN(!buffers, "Could not allocate percpu trace_printk buffer"))
+ return -ENOMEM;

trace_percpu_buffer = buffers;
- trace_percpu_sirq_buffer = sirq_buffers;
- trace_percpu_irq_buffer = irq_buffers;
- trace_percpu_nmi_buffer = nmi_buffers;
-
return 0;
-
- err_nmi:
- free_percpu(irq_buffers);
- err_irq:
- free_percpu(sirq_buffers);
- err_sirq:
- free_percpu(buffers);
- err_warn:
- WARN(1, "Could not allocate percpu trace_printk buffer");
- return -ENOMEM;
}

static int buffers_allocated;
@@ -2250,7 +2477,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
tbuffer = get_trace_buf();
if (!tbuffer) {
len = 0;
- goto out;
+ goto out_nobuffer;
}

len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
@@ -2276,6 +2503,9 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
}

out:
+ put_trace_buf();
+
+out_nobuffer:
preempt_enable_notrace();
unpause_graph_tracing();

@@ -2307,7 +2537,7 @@ __trace_array_vprintk(struct ring_buffer *buffer,
tbuffer = get_trace_buf();
if (!tbuffer) {
len = 0;
- goto out;
+ goto out_nobuffer;
}

len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
@@ -2326,7 +2556,11 @@ __trace_array_vprintk(struct ring_buffer *buffer,
__buffer_unlock_commit(buffer, event);
ftrace_trace_stack(&global_trace, buffer, flags, 6, pc, NULL);
}
- out:
+
+out:
+ put_trace_buf();
+
+out_nobuffer:
preempt_enable_notrace();
unpause_graph_tracing();

@@ -6977,6 +7211,7 @@ init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
for_each_tracing_cpu(cpu)
tracing_init_tracefs_percpu(tr, cpu);

+ ftrace_init_tracefs(tr, d_tracer);
}

static struct vfsmount *trace_automount(void *ingore)
@@ -7130,6 +7365,7 @@ static __init int tracer_init_tracefs(void)
return 0;

init_tracer_tracefs(&global_trace, d_tracer);
+ ftrace_init_tracefs_toplevel(&global_trace, d_tracer);

trace_create_file("tracing_thresh", 0644, d_tracer,
&global_trace, &tracing_thresh_fops);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 5167c366d6b7..f783df416726 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -80,6 +80,12 @@ enum trace_type {
FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
filter)

+#undef FTRACE_ENTRY_PACKED
+#define FTRACE_ENTRY_PACKED(name, struct_name, id, tstruct, print, \
+ filter) \
+ FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
+ filter) __packed
+
#include "trace_entries.h"

/*
@@ -156,6 +162,9 @@ struct trace_array_cpu {
char comm[TASK_COMM_LEN];

bool ignore_pid;
+#ifdef CONFIG_FUNCTION_TRACER
+ bool ftrace_ignore_pid;
+#endif
};

struct tracer;
@@ -247,6 +256,7 @@ struct trace_array {
int ref;
#ifdef CONFIG_FUNCTION_TRACER
struct ftrace_ops *ops;
+ struct trace_pid_list __rcu *function_pids;
/* function tracing enabled */
int function_enabled;
#endif
@@ -628,6 +638,25 @@ extern unsigned long nsecs_to_usecs(unsigned long nsecs);

extern unsigned long tracing_thresh;

+/* PID filtering */
+
+extern int pid_max;
+
+bool trace_find_filtered_pid(struct trace_pid_list *filtered_pids,
+ pid_t search_pid);
+bool trace_ignore_this_task(struct trace_pid_list *filtered_pids,
+ struct task_struct *task);
+void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
+ struct task_struct *self,
+ struct task_struct *task);
+void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos);
+void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos);
+int trace_pid_show(struct seq_file *m, void *v);
+void trace_free_pid_list(struct trace_pid_list *pid_list);
+int trace_pid_write(struct trace_pid_list *filtered_pids,
+ struct trace_pid_list **new_pid_list,
+ const char __user *ubuf, size_t cnt);
+
#ifdef CONFIG_TRACER_MAX_TRACE
void update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu);
void update_max_tr_single(struct trace_array *tr,
@@ -821,12 +850,9 @@ extern struct list_head ftrace_pids;

#ifdef CONFIG_FUNCTION_TRACER
extern bool ftrace_filter_param __initdata;
-static inline int ftrace_trace_task(struct task_struct *task)
+static inline int ftrace_trace_task(struct trace_array *tr)
{
- if (list_empty(&ftrace_pids))
- return 1;
-
- return test_tsk_trace_trace(task);
+ return !this_cpu_read(tr->trace_buffer.data->ftrace_ignore_pid);
}
extern int ftrace_is_dead(void);
int ftrace_create_function_files(struct trace_array *tr,
@@ -836,8 +862,11 @@ void ftrace_init_global_array_ops(struct trace_array *tr);
void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func);
void ftrace_reset_array_ops(struct trace_array *tr);
int using_ftrace_ops_list_func(void);
+void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d_tracer);
+void ftrace_init_tracefs_toplevel(struct trace_array *tr,
+ struct dentry *d_tracer);
#else
-static inline int ftrace_trace_task(struct task_struct *task)
+static inline int ftrace_trace_task(struct trace_array *tr)
{
return 1;
}
@@ -852,6 +881,8 @@ static inline void ftrace_destroy_function_files(struct trace_array *tr) { }
static inline __init void
ftrace_init_global_array_ops(struct trace_array *tr) { }
static inline void ftrace_reset_array_ops(struct trace_array *tr) { }
+static inline void ftrace_init_tracefs(struct trace_array *tr, struct dentry *d) { }
+static inline void ftrace_init_tracefs_toplevel(struct trace_array *tr, struct dentry *d) { }
/* ftace_func_t type is not defined, use macro instead of static inline */
#define ftrace_init_array_ops(tr, func) do { } while (0)
#endif /* CONFIG_FUNCTION_TRACER */
@@ -1600,6 +1631,11 @@ int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled);
#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter) \
FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \
filter)
+#undef FTRACE_ENTRY_PACKED
+#define FTRACE_ENTRY_PACKED(call, struct_name, id, tstruct, print, filter) \
+ FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \
+ filter)
+
#include "trace_entries.h"

#if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_FUNCTION_TRACER)
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index ee7b94a4810a..5c30efcda5e6 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -72,7 +72,7 @@ FTRACE_ENTRY_REG(function, ftrace_entry,
);

/* Function call entry */
-FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
+FTRACE_ENTRY_PACKED(funcgraph_entry, ftrace_graph_ent_entry,

TRACE_GRAPH_ENT,

@@ -88,7 +88,7 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
);

/* Function return entry */
-FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
+FTRACE_ENTRY_PACKED(funcgraph_exit, ftrace_graph_ret_entry,

TRACE_GRAPH_RET,

diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 3d4155892a1e..03c0a48c3ac4 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -15,7 +15,6 @@
#include <linux/kthread.h>
#include <linux/tracefs.h>
#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/ctype.h>
#include <linux/sort.h>
@@ -262,6 +261,14 @@ void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,

local_save_flags(fbuffer->flags);
fbuffer->pc = preempt_count();
+ /*
+ * If CONFIG_PREEMPT is enabled, then the tracepoint itself disables
+ * preemption (adding one to the preempt_count). Since we are
+ * interested in the preempt_count at the time the tracepoint was
+ * hit, we need to subtract one to offset the increment.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT))
+ fbuffer->pc--;
fbuffer->trace_file = trace_file;

fbuffer->event =
@@ -499,60 +506,6 @@ static void ftrace_clear_events(struct trace_array *tr)
mutex_unlock(&event_mutex);
}

-/* Shouldn't this be in a header? */
-extern int pid_max;
-
-/* Returns true if found in filter */
-static bool
-find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
-{
- /*
- * If pid_max changed after filtered_pids was created, we
- * by default ignore all pids greater than the previous pid_max.
- */
- if (search_pid >= filtered_pids->pid_max)
- return false;
-
- return test_bit(search_pid, filtered_pids->pids);
-}
-
-static bool
-ignore_this_task(struct trace_pid_list *filtered_pids, struct task_struct *task)
-{
- /*
- * Return false, because if filtered_pids does not exist,
- * all pids are good to trace.
- */
- if (!filtered_pids)
- return false;
-
- return !find_filtered_pid(filtered_pids, task->pid);
-}
-
-static void filter_add_remove_task(struct trace_pid_list *pid_list,
- struct task_struct *self,
- struct task_struct *task)
-{
- if (!pid_list)
- return;
-
- /* For forks, we only add if the forking task is listed */
- if (self) {
- if (!find_filtered_pid(pid_list, self->pid))
- return;
- }
-
- /* Sorry, but we don't support pid_max changing after setting */
- if (task->pid >= pid_list->pid_max)
- return;
-
- /* "self" is set for forks, and NULL for exits */
- if (self)
- set_bit(task->pid, pid_list->pids);
- else
- clear_bit(task->pid, pid_list->pids);
-}
-
static void
event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
{
@@ -560,7 +513,7 @@ event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
struct trace_array *tr = data;

pid_list = rcu_dereference_sched(tr->filtered_pids);
- filter_add_remove_task(pid_list, NULL, task);
+ trace_filter_add_remove_task(pid_list, NULL, task);
}

static void
@@ -572,7 +525,7 @@ event_filter_pid_sched_process_fork(void *data,
struct trace_array *tr = data;

pid_list = rcu_dereference_sched(tr->filtered_pids);
- filter_add_remove_task(pid_list, self, task);
+ trace_filter_add_remove_task(pid_list, self, task);
}

void trace_event_follow_fork(struct trace_array *tr, bool enable)
@@ -600,8 +553,8 @@ event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
pid_list = rcu_dereference_sched(tr->filtered_pids);

this_cpu_write(tr->trace_buffer.data->ignore_pid,
- ignore_this_task(pid_list, prev) &&
- ignore_this_task(pid_list, next));
+ trace_ignore_this_task(pid_list, prev) &&
+ trace_ignore_this_task(pid_list, next));
}

static void
@@ -614,7 +567,7 @@ event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
pid_list = rcu_dereference_sched(tr->filtered_pids);

this_cpu_write(tr->trace_buffer.data->ignore_pid,
- ignore_this_task(pid_list, next));
+ trace_ignore_this_task(pid_list, next));
}

static void
@@ -630,7 +583,7 @@ event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
pid_list = rcu_dereference_sched(tr->filtered_pids);

this_cpu_write(tr->trace_buffer.data->ignore_pid,
- ignore_this_task(pid_list, task));
+ trace_ignore_this_task(pid_list, task));
}

static void
@@ -647,7 +600,7 @@ event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)

/* Set tracing if current is enabled */
this_cpu_write(tr->trace_buffer.data->ignore_pid,
- ignore_this_task(pid_list, current));
+ trace_ignore_this_task(pid_list, current));
}

static void __ftrace_clear_event_pids(struct trace_array *tr)
@@ -685,8 +638,7 @@ static void __ftrace_clear_event_pids(struct trace_array *tr)
/* Wait till all users are no longer using pid filtering */
synchronize_sched();

- vfree(pid_list->pids);
- kfree(pid_list);
+ trace_free_pid_list(pid_list);
}

static void ftrace_clear_event_pids(struct trace_array *tr)
@@ -1034,18 +986,8 @@ p_next(struct seq_file *m, void *v, loff_t *pos)
{
struct trace_array *tr = m->private;
struct trace_pid_list *pid_list = rcu_dereference_sched(tr->filtered_pids);
- unsigned long pid = (unsigned long)v;
-
- (*pos)++;
-
- /* pid already is +1 of the actual prevous bit */
- pid = find_next_bit(pid_list->pids, pid_list->pid_max, pid);

- /* Return pid + 1 to allow zero to be represented */
- if (pid < pid_list->pid_max)
- return (void *)(pid + 1);
-
- return NULL;
+ return trace_pid_next(pid_list, v, pos);
}

static void *p_start(struct seq_file *m, loff_t *pos)
@@ -1053,8 +995,6 @@ static void *p_start(struct seq_file *m, loff_t *pos)
{
struct trace_pid_list *pid_list;
struct trace_array *tr = m->private;
- unsigned long pid;
- loff_t l = 0;

/*
* Grab the mutex, to keep calls to p_next() having the same
@@ -1070,15 +1010,7 @@ static void *p_start(struct seq_file *m, loff_t *pos)
if (!pid_list)
return NULL;

- pid = find_first_bit(pid_list->pids, pid_list->pid_max);
- if (pid >= pid_list->pid_max)
- return NULL;
-
- /* Return pid + 1 so that zero can be the exit value */
- for (pid++; pid && l < *pos;
- pid = (unsigned long)p_next(m, (void *)pid, &l))
- ;
- return (void *)pid;
+ return trace_pid_start(pid_list, pos);
}

static void p_stop(struct seq_file *m, void *p)
@@ -1088,14 +1020,6 @@ static void p_stop(struct seq_file *m, void *p)
mutex_unlock(&event_mutex);
}

-static int p_show(struct seq_file *m, void *v)
-{
- unsigned long pid = (unsigned long)v - 1;
-
- seq_printf(m, "%lu\n", pid);
- return 0;
-}
-
static ssize_t
event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
loff_t *ppos)
@@ -1654,7 +1578,7 @@ static void ignore_task_cpu(void *data)
mutex_is_locked(&event_mutex));

this_cpu_write(tr->trace_buffer.data->ignore_pid,
- ignore_this_task(pid_list, current));
+ trace_ignore_this_task(pid_list, current));
}

static ssize_t
@@ -1666,13 +1590,7 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
struct trace_pid_list *filtered_pids = NULL;
struct trace_pid_list *pid_list;
struct trace_event_file *file;
- struct trace_parser parser;
- unsigned long val;
- loff_t this_pos;
- ssize_t read = 0;
- ssize_t ret = 0;
- pid_t pid;
- int nr_pids = 0;
+ ssize_t ret;

if (!cnt)
return 0;
@@ -1681,93 +1599,15 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
if (ret < 0)
return ret;

- if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
- return -ENOMEM;
-
mutex_lock(&event_mutex);
+
filtered_pids = rcu_dereference_protected(tr->filtered_pids,
lockdep_is_held(&event_mutex));

- /*
- * Always recreate a new array. The write is an all or nothing
- * operation. Always create a new array when adding new pids by
- * the user. If the operation fails, then the current list is
- * not modified.
- */
- pid_list = kmalloc(sizeof(*pid_list), GFP_KERNEL);
- if (!pid_list) {
- read = -ENOMEM;
- goto out;
- }
- pid_list->pid_max = READ_ONCE(pid_max);
- /* Only truncating will shrink pid_max */
- if (filtered_pids && filtered_pids->pid_max > pid_list->pid_max)
- pid_list->pid_max = filtered_pids->pid_max;
- pid_list->pids = vzalloc((pid_list->pid_max + 7) >> 3);
- if (!pid_list->pids) {
- kfree(pid_list);
- read = -ENOMEM;
- goto out;
- }
- if (filtered_pids) {
- /* copy the current bits to the new max */
- pid = find_first_bit(filtered_pids->pids,
- filtered_pids->pid_max);
- while (pid < filtered_pids->pid_max) {
- set_bit(pid, pid_list->pids);
- pid = find_next_bit(filtered_pids->pids,
- filtered_pids->pid_max,
- pid + 1);
- nr_pids++;
- }
- }
-
- while (cnt > 0) {
-
- this_pos = 0;
-
- ret = trace_get_user(&parser, ubuf, cnt, &this_pos);
- if (ret < 0 || !trace_parser_loaded(&parser))
- break;
-
- read += ret;
- ubuf += ret;
- cnt -= ret;
-
- parser.buffer[parser.idx] = 0;
-
- ret = -EINVAL;
- if (kstrtoul(parser.buffer, 0, &val))
- break;
- if (val >= pid_list->pid_max)
- break;
-
- pid = (pid_t)val;
-
- set_bit(pid, pid_list->pids);
- nr_pids++;
-
- trace_parser_clear(&parser);
- ret = 0;
- }
- trace_parser_put(&parser);
-
- if (ret < 0) {
- vfree(pid_list->pids);
- kfree(pid_list);
- read = ret;
+ ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
+ if (ret < 0)
goto out;
- }

- if (!nr_pids) {
- /* Cleared the list of pids */
- vfree(pid_list->pids);
- kfree(pid_list);
- read = ret;
- if (!filtered_pids)
- goto out;
- pid_list = NULL;
- }
rcu_assign_pointer(tr->filtered_pids, pid_list);

list_for_each_entry(file, &tr->events, list) {
@@ -1776,10 +1616,8 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,

if (filtered_pids) {
synchronize_sched();
-
- vfree(filtered_pids->pids);
- kfree(filtered_pids);
- } else {
+ trace_free_pid_list(filtered_pids);
+ } else if (pid_list) {
/*
* Register a probe that is called before all other probes
* to set ignore_pid if next or prev do not match.
@@ -1817,9 +1655,8 @@ ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
out:
mutex_unlock(&event_mutex);

- ret = read;
- if (read > 0)
- *ppos += read;
+ if (ret > 0)
+ *ppos += ret;

return ret;
}
@@ -1846,7 +1683,7 @@ static const struct seq_operations show_set_event_seq_ops = {
static const struct seq_operations show_set_pid_seq_ops = {
.start = p_start,
.next = p_next,
- .show = p_show,
+ .show = trace_pid_show,
.stop = p_stop,
};

diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 5a095c2e4b69..0efa00d80623 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -43,7 +43,7 @@ static int allocate_ftrace_ops(struct trace_array *tr)

/* Currently only the non stack verision is supported */
ops->func = function_trace_call;
- ops->flags = FTRACE_OPS_FL_RECURSION_SAFE;
+ ops->flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_PID;

tr->ops = ops;
ops->private = tr;
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 3a0244ff7ea8..7363ccf79512 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -319,7 +319,7 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
int cpu;
int pc;

- if (!ftrace_trace_task(current))
+ if (!ftrace_trace_task(tr))
return 0;

/* trace it when it is-nested-in or is a function enabled. */
@@ -338,6 +338,13 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
if (ftrace_graph_notrace_addr(trace->func))
return 1;

+ /*
+ * Stop here if tracing_threshold is set. We only write function return
+ * events to the ring buffer.
+ */
+ if (tracing_thresh)
+ return 1;
+
local_irq_save(flags);
cpu = raw_smp_processor_id();
data = per_cpu_ptr(tr->trace_buffer.data, cpu);
@@ -355,14 +362,6 @@ int trace_graph_entry(struct ftrace_graph_ent *trace)
return ret;
}

-static int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
-{
- if (tracing_thresh)
- return 1;
- else
- return trace_graph_entry(trace);
-}
-
static void
__trace_graph_function(struct trace_array *tr,
unsigned long ip, unsigned long flags, int pc)
@@ -457,7 +456,7 @@ static int graph_trace_init(struct trace_array *tr)
set_graph_array(tr);
if (tracing_thresh)
ret = register_ftrace_graph(&trace_graph_thresh_return,
- &trace_graph_thresh_entry);
+ &trace_graph_entry);
else
ret = register_ftrace_graph(&trace_graph_return,
&trace_graph_entry);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 5546eec0505f..9aedb0b06683 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -587,6 +587,7 @@ static int create_trace_kprobe(int argc, char **argv)
* $retval : fetch return value
* $stack : fetch stack address
* $stackN : fetch Nth of stack (N:0-)
+ * $comm : fetch current task comm
* @ADDR : fetch memory at ADDR (ADDR should be in kernel)
* @SYM[+|-offs] : fetch memory at SYM +|- offs (SYM is a data symbol)
* %REG : fetch register REG
diff --git a/kernel/trace/trace_mmiotrace.c b/kernel/trace/trace_mmiotrace.c
index 68f376ca6d3f..cd7480d0a201 100644
--- a/kernel/trace/trace_mmiotrace.c
+++ b/kernel/trace/trace_mmiotrace.c
@@ -68,19 +68,15 @@ static void mmio_print_pcidev(struct trace_seq *s, const struct pci_dev *dev)
trace_seq_printf(s, "PCIDEV %02x%02x %04x%04x %x",
dev->bus->number, dev->devfn,
dev->vendor, dev->device, dev->irq);
- /*
- * XXX: is pci_resource_to_user() appropriate, since we are
- * supposed to interpret the __ioremap() phys_addr argument based on
- * these printed values?
- */
for (i = 0; i < 7; i++) {
- pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
+ start = dev->resource[i].start;
trace_seq_printf(s, " %llx",
(unsigned long long)(start |
(dev->resource[i].flags & PCI_REGION_FLAG_MASK)));
}
for (i = 0; i < 7; i++) {
- pci_resource_to_user(dev, i, &dev->resource[i], &start, &end);
+ start = dev->resource[i].start;
+ end = dev->resource[i].end;
trace_seq_printf(s, " %llx",
dev->resource[i].start < dev->resource[i].end ?
(unsigned long long)(end - start) + 1 : 0);
diff --git a/kernel/trace/trace_probe.c b/kernel/trace/trace_probe.c
index 1d372fa6fefb..74e80a582c28 100644
--- a/kernel/trace/trace_probe.c
+++ b/kernel/trace/trace_probe.c
@@ -218,6 +218,28 @@ free_bitfield_fetch_param(struct bitfield_fetch_param *data)
kfree(data);
}

+void FETCH_FUNC_NAME(comm, string)(struct pt_regs *regs,
+ void *data, void *dest)
+{
+ int maxlen = get_rloc_len(*(u32 *)dest);
+ u8 *dst = get_rloc_data(dest);
+ long ret;
+
+ if (!maxlen)
+ return;
+
+ ret = strlcpy(dst, current->comm, maxlen);
+ *(u32 *)dest = make_data_rloc(ret, get_rloc_offs(*(u32 *)dest));
+}
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(comm, string));
+
+void FETCH_FUNC_NAME(comm, string_size)(struct pt_regs *regs,
+ void *data, void *dest)
+{
+ *(u32 *)dest = strlen(current->comm) + 1;
+}
+NOKPROBE_SYMBOL(FETCH_FUNC_NAME(comm, string_size));
+
static const struct fetch_type *find_fetch_type(const char *type,
const struct fetch_type *ftbl)
{
@@ -348,6 +370,11 @@ static int parse_probe_vars(char *arg, const struct fetch_type *t,
}
} else
ret = -EINVAL;
+ } else if (strcmp(arg, "comm") == 0) {
+ if (strcmp(t->name, "string") != 0 &&
+ strcmp(t->name, "string_size") != 0)
+ return -EINVAL;
+ f->fn = t->fetch[FETCH_MTD_comm];
} else
ret = -EINVAL;

@@ -522,6 +549,12 @@ int traceprobe_parse_probe_arg(char *arg, ssize_t *size,
arg[t - parg->comm] = '\0';
t++;
}
+ /*
+ * The default type of $comm should be "string", and it can't be
+ * dereferenced.
+ */
+ if (!t && strcmp(arg, "$comm") == 0)
+ t = "string";
parg->type = find_fetch_type(t, ftbl);
if (!parg->type) {
pr_info("Unsupported type: %s\n", t);
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index f6398db09114..45400ca5ded1 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -102,6 +102,7 @@ enum {
FETCH_MTD_reg = 0,
FETCH_MTD_stack,
FETCH_MTD_retval,
+ FETCH_MTD_comm,
FETCH_MTD_memory,
FETCH_MTD_symbol,
FETCH_MTD_deref,
@@ -183,6 +184,14 @@ DECLARE_BASIC_FETCH_FUNCS(bitfield);
#define fetch_bitfield_string NULL
#define fetch_bitfield_string_size NULL

+/* comm only makes sense as a string */
+#define fetch_comm_u8 NULL
+#define fetch_comm_u16 NULL
+#define fetch_comm_u32 NULL
+#define fetch_comm_u64 NULL
+DECLARE_FETCH_FUNC(comm, string);
+DECLARE_FETCH_FUNC(comm, string_size);
+
/*
* Define macro for basic types - we don't need to define s* types, because
* we have to care only about bitwidth at recording time.
@@ -213,6 +222,7 @@ DEFINE_FETCH_##method(u64)
ASSIGN_FETCH_FUNC(reg, ftype), \
ASSIGN_FETCH_FUNC(stack, ftype), \
ASSIGN_FETCH_FUNC(retval, ftype), \
+ASSIGN_FETCH_FUNC(comm, ftype), \
ASSIGN_FETCH_FUNC(memory, ftype), \
ASSIGN_FETCH_FUNC(symbol, ftype), \
ASSIGN_FETCH_FUNC(deref, ftype), \
diff --git a/samples/Kconfig b/samples/Kconfig
index 559a58baff6e..27a24571e96c 100644
--- a/samples/Kconfig
+++ b/samples/Kconfig
@@ -11,6 +11,13 @@ config SAMPLE_TRACE_EVENTS
help
This build trace event example modules.

+config SAMPLE_TRACE_PRINTK
+ tristate "Build trace_printk module - tests various trace_printk formats"
+ depends on EVENT_TRACING && m
+ help
+ This builds a module that calls trace_printk() and can be used to
+ test various trace_printk() calls from a module.
+
config SAMPLE_KOBJECT
tristate "Build kobject examples -- loadable modules only"
depends on m
diff --git a/samples/Makefile b/samples/Makefile
index 2e3b523d7097..1a20169d85ac 100644
--- a/samples/Makefile
+++ b/samples/Makefile
@@ -2,4 +2,4 @@

obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \
hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \
- configfs/ connector/ v4l/
+ configfs/ connector/ v4l/ trace_printk/
diff --git a/samples/trace_printk/Makefile b/samples/trace_printk/Makefile
new file mode 100644
index 000000000000..19900ab2b00d
--- /dev/null
+++ b/samples/trace_printk/Makefile
@@ -0,0 +1,6 @@
+# builds a module that calls various trace_printk routines
+# then to use one (as root): insmod <module_name.ko>
+
+# This module can also be used to test the trace_printk code.
+
+obj-$(CONFIG_SAMPLE_TRACE_PRINTK) += trace-printk.o
diff --git a/samples/trace_printk/trace-printk.c b/samples/trace_printk/trace-printk.c
new file mode 100644
index 000000000000..e9e0040ff7be
--- /dev/null
+++ b/samples/trace_printk/trace-printk.c
@@ -0,0 +1,56 @@
+#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/irq_work.h>
+
+/* Must not be static to force gcc to consider these non constant */
+char *trace_printk_test_global_str =
+ "This is a dynamic string that will use trace_puts\n";
+
+char *trace_printk_test_global_str_irq =
+ "(irq) This is a dynamic string that will use trace_puts\n";
+
+char *trace_printk_test_global_str_fmt =
+ "%sThis is a %s that will use trace_printk\n";
+
+static struct irq_work irqwork;
+
+static void trace_printk_irq_work(struct irq_work *work)
+{
+ trace_printk("(irq) This is a static string that will use trace_bputs\n");
+ trace_printk(trace_printk_test_global_str_irq);
+
+ trace_printk("(irq) This is a %s that will use trace_bprintk()\n",
+ "static string");
+
+ trace_printk(trace_printk_test_global_str_fmt,
+ "(irq) ", "dynamic string");
+}
+
+static int __init trace_printk_init(void)
+{
+ init_irq_work(&irqwork, trace_printk_irq_work);
+
+ trace_printk("This is a static string that will use trace_bputs\n");
+ trace_printk(trace_printk_test_global_str);
+
+ /* Kick off printing in irq context */
+ irq_work_queue(&irqwork);
+
+ trace_printk("This is a %s that will use trace_bprintk()\n",
+ "static string");
+
+ trace_printk(trace_printk_test_global_str_fmt, "", "dynamic string");
+
+ return 0;
+}
+
+static void __exit trace_printk_exit(void)
+{
+}
+
+module_init(trace_printk_init);
+module_exit(trace_printk_exit);
+
+MODULE_AUTHOR("Steven Rostedt");
+MODULE_DESCRIPTION("trace-printk");
+MODULE_LICENSE("GPL");