Re: [PATCH 15/19] vfs: Add superblock notifications [ver #16]

From: Jann Horn
Date: Wed Feb 19 2020 - 18:08:39 EST


On Tue, Feb 18, 2020 at 6:07 PM David Howells <dhowells@xxxxxxxxxx> wrote:
> Add a superblock event notification facility whereby notifications about
> superblock events, such as I/O errors (EIO), quota limits being hit
> (EDQUOT) and running out of space (ENOSPC) can be reported to a monitoring
> process asynchronously. Note that this does not cover vfsmount topology
> changes. watch_mount() is used for that.
[...]
> @@ -354,6 +356,10 @@ void deactivate_locked_super(struct super_block *s)
> {
> struct file_system_type *fs = s->s_type;
> if (atomic_dec_and_test(&s->s_active)) {
> +#ifdef CONFIG_SB_NOTIFICATIONS
> + if (s->s_watchers)
> + remove_watch_list(s->s_watchers, s->s_unique_id);
> +#endif
> cleancache_invalidate_fs(s);
> unregister_shrinker(&s->s_shrink);
> fs->kill_sb(s);
[...]
> +/**
> + * sys_watch_sb - Watch for superblock events.
> + * @dfd: Base directory to pathwalk from or fd referring to superblock.
> + * @filename: Path to superblock to place the watch upon
> + * @at_flags: Pathwalk control flags
> + * @watch_fd: The watch queue to send notifications to.
> + * @watch_id: The watch ID to be placed in the notification (-1 to remove watch)
> + */
> +SYSCALL_DEFINE5(watch_sb,
> + int, dfd,
> + const char __user *, filename,
> + unsigned int, at_flags,
> + int, watch_fd,
> + int, watch_id)
> +{
> + struct watch_queue *wqueue;
> + struct super_block *s;
> + struct watch_list *wlist = NULL;
> + struct watch *watch = NULL;
> + struct path path;
> + unsigned int lookup_flags =
> + LOOKUP_DIRECTORY | LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT;
> + int ret;
[...]
> + wqueue = get_watch_queue(watch_fd);
> + if (IS_ERR(wqueue))
> + goto err_path;
> +
> + s = path.dentry->d_sb;
> + if (watch_id >= 0) {
> + ret = -ENOMEM;
> + if (!s->s_watchers) {

READ_ONCE() ?

> + wlist = kzalloc(sizeof(*wlist), GFP_KERNEL);
> + if (!wlist)
> + goto err_wqueue;
> + init_watch_list(wlist, NULL);
> + }
> +
> + watch = kzalloc(sizeof(*watch), GFP_KERNEL);
> + if (!watch)
> + goto err_wlist;
> +
> + init_watch(watch, wqueue);
> + watch->id = s->s_unique_id;
> + watch->private = s;
> + watch->info_id = (u32)watch_id << 24;
> +
> + ret = security_watch_sb(watch, s);
> + if (ret < 0)
> + goto err_watch;
> +
> + down_write(&s->s_umount);
> + ret = -EIO;
> + if (atomic_read(&s->s_active)) {
> + if (!s->s_watchers) {
> + s->s_watchers = wlist;
> + wlist = NULL;
> + }
> +
> + ret = add_watch_to_object(watch, s->s_watchers);
> + if (ret == 0) {
> + spin_lock(&sb_lock);
> + s->s_count++;
> + spin_unlock(&sb_lock);

Where is the corresponding decrement of s->s_count? I'm guessing that
it should be in the ->release_watch() handler, except that there isn't
one...

> + watch = NULL;
> + }
> + }
> + up_write(&s->s_umount);
> + } else {
> + ret = -EBADSLT;
> + if (READ_ONCE(s->s_watchers)) {

(Nit: I don't get why you do a lockless check here before taking the
lock - it'd be more straightforward to take the lock first, and it's
not like you want to optimize for the case where someone calls
sys_watch_sb() with invalid arguments...)

> + down_write(&s->s_umount);
> + ret = remove_watch_from_object(s->s_watchers, wqueue,
> + s->s_unique_id, false);
> + up_write(&s->s_umount);
> + }
> + }
> +
> +err_watch:
> + kfree(watch);
> +err_wlist:
> + kfree(wlist);
> +err_wqueue:
> + put_watch_queue(wqueue);
> +err_path:
> + path_put(&path);
> + return ret;
> +}
> +#endif
[...]
> +/**
> + * notify_sb: Post simple superblock notification.
> + * @s: The superblock the notification is about.
> + * @subtype: The type of notification.
> + * @info: WATCH_INFO_FLAG_* flags to be set in the record.
> + */
> +static inline void notify_sb(struct super_block *s,
> + enum superblock_notification_type subtype,
> + u32 info)
> +{
> +#ifdef CONFIG_SB_NOTIFICATIONS
> + if (unlikely(s->s_watchers)) {

READ_ONCE() ?

> + struct superblock_notification n = {
> + .watch.type = WATCH_TYPE_SB_NOTIFY,
> + .watch.subtype = subtype,
> + .watch.info = watch_sizeof(n) | info,
> + .sb_id = s->s_unique_id,
> + };
> +
> + post_sb_notification(s, &n);
> + }
> +
> +#endif
> +}
> +
> +/**
> + * notify_sb_error: Post superblock error notification.
> + * @s: The superblock the notification is about.
> + * @error: The error number to be recorded.
> + */
> +static inline int notify_sb_error(struct super_block *s, int error)
> +{
> +#ifdef CONFIG_SB_NOTIFICATIONS
> + if (unlikely(s->s_watchers)) {

READ_ONCE() ?

> + struct superblock_error_notification n = {
> + .s.watch.type = WATCH_TYPE_SB_NOTIFY,
> + .s.watch.subtype = NOTIFY_SUPERBLOCK_ERROR,
> + .s.watch.info = watch_sizeof(n),
> + .s.sb_id = s->s_unique_id,
> + .error_number = error,
> + .error_cookie = 0,
> + };
> +
> + post_sb_notification(s, &n.s);
> + }
> +#endif
> + return error;
> +}