[PATCH v7 1/4] kernfs: make ->attr.open RCU protected.

From: Imran Khan
Date: Tue Jun 14 2022 - 22:11:26 EST


After removal of kernfs_open_node->refcnt in the previous patch,
kernfs_open_node_lock can be removed as well by making ->attr.open
RCU protected. kernfs_put_open_node can delegate freeing to ->attr.open
to RCU and other readers of ->attr.open can do so under rcu_read_(un)lock.

Suggested by: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Imran Khan <imran.f.khan@xxxxxxxxxx>
---
fs/kernfs/file.c | 147 ++++++++++++++++++++++++++++-------------
include/linux/kernfs.h | 2 +-
2 files changed, 102 insertions(+), 47 deletions(-)

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index e3abfa843879c..706aebcfb8f69 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -23,16 +23,16 @@
* for each kernfs_node with one or more open files.
*
* kernfs_node->attr.open points to kernfs_open_node. attr.open is
- * protected by kernfs_open_node_lock.
+ * RCU protected.
*
* filp->private_data points to seq_file whose ->private points to
* kernfs_open_file. kernfs_open_files are chained at
* kernfs_open_node->files, which is protected by kernfs_open_file_mutex.
*/
-static DEFINE_SPINLOCK(kernfs_open_node_lock);
static DEFINE_MUTEX(kernfs_open_file_mutex);

struct kernfs_open_node {
+ struct rcu_head rcu_head;
atomic_t event;
wait_queue_head_t poll;
struct list_head files; /* goes through kernfs_open_file.list */
@@ -51,6 +51,52 @@ struct kernfs_open_node {
static DEFINE_SPINLOCK(kernfs_notify_lock);
static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;

+/**
+ * kernfs_deref_open_node - Get kernfs_open_node corresponding to @kn.
+ *
+ * @of: associated kernfs_open_file instance.
+ * @kn: target kernfs_node.
+ *
+ * Fetch and return ->attr.open of @kn if @of->list is non empty.
+ * If @of->list is not empty we can safely assume that @of is on
+ * @kn->attr.open->files list and this guarantees that @kn->attr.open
+ * will not vanish i.e. dereferencing outside RCU read-side critical
+ * section is safe here.
+ *
+ * The caller needs to make sure that @of->list is not empty.
+ */
+static struct kernfs_open_node *
+kernfs_deref_open_node(struct kernfs_open_file *of, struct kernfs_node *kn)
+{
+ struct kernfs_open_node *on;
+
+ on = rcu_dereference_check(kn->attr.open, !list_empty(&of->list));
+
+ return on;
+}
+
+/**
+ * kernfs_deref_open_node_protected - Get kernfs_open_node corresponding to @kn
+ *
+ * @kn: target kernfs_node.
+ *
+ * Fetch and return ->attr.open of @kn when caller holds the
+ * kernfs_open_file_mutex.
+ *
+ * Update of ->attr.open happens under kernfs_open_file_mutex. So when
+ * the caller guarantees that this mutex is being held, other updaters can't
+ * change ->attr.open and this means that we can safely deref ->attr.open
+ * outside RCU read-side critical section.
+ *
+ * The caller needs to make sure that kernfs_open_file_mutex is held.
+ */
+static struct kernfs_open_node *
+kernfs_deref_open_node_protected(struct kernfs_node *kn)
+{
+ return rcu_dereference_protected(kn->attr.open,
+ lockdep_is_held(&kernfs_open_file_mutex));
+}
+
static struct kernfs_open_file *kernfs_of(struct file *file)
{
return ((struct seq_file *)file->private_data)->private;
@@ -156,8 +202,12 @@ static void kernfs_seq_stop(struct seq_file *sf, void *v)
static int kernfs_seq_show(struct seq_file *sf, void *v)
{
struct kernfs_open_file *of = sf->private;
+ struct kernfs_open_node *on = kernfs_deref_open_node(of, of->kn);

- of->event = atomic_read(&of->kn->attr.open->event);
+ if (!on)
+ return -EINVAL;
+
+ of->event = atomic_read(&on->event);

return of->kn->attr.ops->seq_show(sf, v);
}
@@ -180,6 +230,7 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE);
const struct kernfs_ops *ops;
+ struct kernfs_open_node *on;
char *buf;

buf = of->prealloc_buf;
@@ -201,7 +252,15 @@ static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
goto out_free;
}

- of->event = atomic_read(&of->kn->attr.open->event);
+ on = kernfs_deref_open_node(of, of->kn);
+ if (!on) {
+ len = -EINVAL;
+ mutex_unlock(&of->mutex);
+ goto out_free;
+ }
+
+ of->event = atomic_read(&on->event);
+
ops = kernfs_ops(of->kn);
if (ops->read)
len = ops->read(of, buf, len, iocb->ki_pos);
@@ -519,36 +578,29 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
{
struct kernfs_open_node *on, *new_on = NULL;

- retry:
mutex_lock(&kernfs_open_file_mutex);
- spin_lock_irq(&kernfs_open_node_lock);
-
- if (!kn->attr.open && new_on) {
- kn->attr.open = new_on;
- new_on = NULL;
- }
-
- on = kn->attr.open;
- if (on)
- list_add_tail(&of->list, &on->files);
-
- spin_unlock_irq(&kernfs_open_node_lock);
- mutex_unlock(&kernfs_open_file_mutex);
+ on = kernfs_deref_open_node_protected(kn);

if (on) {
- kfree(new_on);
+ list_add_tail(&of->list, &on->files);
+ mutex_unlock(&kernfs_open_file_mutex);
return 0;
+ } else {
+ /* not there, initialize a new one */
+ new_on = kmalloc(sizeof(*new_on), GFP_KERNEL);
+ if (!new_on) {
+ mutex_unlock(&kernfs_open_file_mutex);
+ return -ENOMEM;
+ }
+ atomic_set(&new_on->event, 1);
+ init_waitqueue_head(&new_on->poll);
+ INIT_LIST_HEAD(&new_on->files);
+ list_add_tail(&of->list, &new_on->files);
+ rcu_assign_pointer(kn->attr.open, new_on);
}
+ mutex_unlock(&kernfs_open_file_mutex);

- /* not there, initialize a new one and retry */
- new_on = kmalloc(sizeof(*new_on), GFP_KERNEL);
- if (!new_on)
- return -ENOMEM;
-
- atomic_set(&new_on->event, 1);
- init_waitqueue_head(&new_on->poll);
- INIT_LIST_HEAD(&new_on->files);
- goto retry;
+ return 0;
}

/**
@@ -567,24 +619,25 @@ static int kernfs_get_open_node(struct kernfs_node *kn,
static void kernfs_unlink_open_file(struct kernfs_node *kn,
struct kernfs_open_file *of)
{
- struct kernfs_open_node *on = kn->attr.open;
- unsigned long flags;
+ struct kernfs_open_node *on;

mutex_lock(&kernfs_open_file_mutex);
- spin_lock_irqsave(&kernfs_open_node_lock, flags);
+
+ on = kernfs_deref_open_node_protected(kn);
+ if (!on) {
+ mutex_unlock(&kernfs_open_file_mutex);
+ return;
+ }

if (of)
list_del(&of->list);

- if (list_empty(&on->files))
- kn->attr.open = NULL;
- else
- on = NULL;
+ if (list_empty(&on->files)) {
+ rcu_assign_pointer(kn->attr.open, NULL);
+ kfree_rcu(on, rcu_head);
+ }

- spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
mutex_unlock(&kernfs_open_file_mutex);
-
- kfree(on);
}

static int kernfs_fop_open(struct inode *inode, struct file *file)
@@ -774,17 +827,16 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
* check under kernfs_open_file_mutex will ensure bailing out if
* ->attr.open became NULL while waiting for the mutex.
*/
- if (!kn->attr.open)
+ if (!rcu_access_pointer(kn->attr.open))
return;

mutex_lock(&kernfs_open_file_mutex);
- if (!kn->attr.open) {
+ on = kernfs_deref_open_node_protected(kn);
+ if (!on) {
mutex_unlock(&kernfs_open_file_mutex);
return;
}

- on = kn->attr.open;
-
list_for_each_entry(of, &on->files, list) {
struct inode *inode = file_inode(of->file);

@@ -815,7 +867,10 @@ void kernfs_drain_open_files(struct kernfs_node *kn)
__poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait)
{
struct kernfs_node *kn = kernfs_dentry_node(of->file->f_path.dentry);
- struct kernfs_open_node *on = kn->attr.open;
+ struct kernfs_open_node *on = kernfs_deref_open_node(of, kn);
+
+ if (!on)
+ return EPOLLERR;

poll_wait(of->file, &on->poll, wait);

@@ -922,13 +977,13 @@ void kernfs_notify(struct kernfs_node *kn)
return;

/* kick poll immediately */
- spin_lock_irqsave(&kernfs_open_node_lock, flags);
- on = kn->attr.open;
+ rcu_read_lock();
+ on = rcu_dereference(kn->attr.open);
if (on) {
atomic_inc(&on->event);
wake_up_interruptible(&on->poll);
}
- spin_unlock_irqrestore(&kernfs_open_node_lock, flags);
+ rcu_read_unlock();

/* schedule work to kick fsnotify */
spin_lock_irqsave(&kernfs_notify_lock, flags);
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index e2ae15a6225e8..13f54f078a52a 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -114,7 +114,7 @@ struct kernfs_elem_symlink {

struct kernfs_elem_attr {
const struct kernfs_ops *ops;
- struct kernfs_open_node *open;
+ struct kernfs_open_node __rcu *open;
loff_t size;
struct kernfs_node *notify_next; /* for kernfs_notify() */
};
--
2.30.2