Re: [Virtio-fs] [PATCH 3/2] fs: simplify get_filesystem_list / get_all_fs_names

From: Vivek Goyal
Date: Thu Jul 08 2021 - 08:59:50 EST


On Wed, Jul 07, 2021 at 05:06:36PM -0400, Vivek Goyal wrote:
> On Wed, Jul 07, 2021 at 05:04:04PM -0400, Vivek Goyal wrote:
> > On Wed, Jun 30, 2021 at 07:36:01AM +0200, Christoph Hellwig wrote:
> > > On Tue, Jun 29, 2021 at 04:50:48PM -0400, Vivek Goyal wrote:
> > > > May be we should modify mount_block_root() code so that it does not
> > > > require that extra "\0". Possibly zero initialize page and that should
> > > > make sure list_bdev_fs_names() does not have to worry about it.
> > > >
> > > > It is possible that a page gets full from the list of filesystems, and
> > > > last byte on page is terminating null. In that case just zeroing page
> > > > will not help. We can keep track of some sort of end pointer and make
> > > > sure we are not searching beyond that for valid filesystem types.
> > > >
> > > > end = page + PAGE_SIZE - 1;
> > > >
> > > > mount_block_root()
> > > > {
> > > > for (p = fs_names; p < end && *p; p += strlen(p)+1) {
> > > > }
> > > > }
> > >
> > > Maybe. To honest I'd prefer to not even touch this unrelated code given
> > > how full of landmines it is :)
> >
> > Hi Christoph,
> >
> > How about following patch. This applies on top of your patches. I noticed
> > that Al had suggested to return number of filesystems from helper
> > functions. I just did that and used that to iterate in the loop.
> >
> > I tested it with a virtual block device (root=/dev/vda1) and it works.
> > I also filled page with garbage after allocation to make sure natually
> > occurring null is not there in the middle of page to terminate string.
> >
> > If you like it, can you please incorporate it in your patches.
>
> I noticed this will break with "root_fs_names=". Sorry, will have to
> fix split_fs_names() as well. Will do.

Hi Christoph,

I fixed it. Now both split_fs_names() and list_bdev_fs_names() return
count of fstype strings it placed in the buffer. And callers now
use that count to loop (instead of relying on extra null byte at the
end of the buffer).

I tested both nodev (virtiofs, 9p) and block dev rootfs (ext4) and
it works for me. Please have a look.

Thanks
Vivek


---
fs/filesystems.c | 5 ++++-
include/linux/fs.h | 2 +-
init/do_mounts.c | 35 +++++++++++++++++++++++------------
3 files changed, 28 insertions(+), 14 deletions(-)

Index: redhat-linux/fs/filesystems.c
===================================================================
--- redhat-linux.orig/fs/filesystems.c 2021-07-08 08:02:09.772766786 -0400
+++ redhat-linux/fs/filesystems.c 2021-07-08 08:02:12.044860918 -0400
@@ -209,10 +209,11 @@ SYSCALL_DEFINE3(sysfs, int, option, unsi
}
#endif

-void __init list_bdev_fs_names(char *buf, size_t size)
+int __init list_bdev_fs_names(char *buf, size_t size)
{
struct file_system_type *p;
size_t len;
+ int count = 0;

read_lock(&file_systems_lock);
for (p = file_systems; p; p = p->next) {
@@ -226,8 +227,10 @@ void __init list_bdev_fs_names(char *buf
memcpy(buf, p->name, len);
buf += len;
size -= len;
+ count++;
}
read_unlock(&file_systems_lock);
+ return count;
}

#ifdef CONFIG_PROC_FS
Index: redhat-linux/include/linux/fs.h
===================================================================
--- redhat-linux.orig/include/linux/fs.h 2021-07-08 08:02:09.774766869 -0400
+++ redhat-linux/include/linux/fs.h 2021-07-08 08:02:12.046861001 -0400
@@ -3622,7 +3622,7 @@ int proc_nr_dentry(struct ctl_table *tab
void *buffer, size_t *lenp, loff_t *ppos);
int proc_nr_inodes(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos);
-void __init list_bdev_fs_names(char *buf, size_t size);
+int __init list_bdev_fs_names(char *buf, size_t size);

#define __FMODE_EXEC ((__force int) FMODE_EXEC)
#define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY)
Index: redhat-linux/init/do_mounts.c
===================================================================
--- redhat-linux.orig/init/do_mounts.c 2021-07-08 08:02:09.774766869 -0400
+++ redhat-linux/init/do_mounts.c 2021-07-08 08:02:12.046861001 -0400
@@ -338,14 +338,22 @@ __setup("rootflags=", root_data_setup);
__setup("rootfstype=", fs_names_setup);
__setup("rootdelay=", root_delay_setup);

-static void __init split_fs_names(char *page, char *names)
+static int __init split_fs_names(char *page, char *names)
{
- strcpy(page, root_fs_names);
- while (*page++) {
- if (page[-1] == ',')
- page[-1] = '\0';
+ int count = 0;
+ char *p = page;
+
+ strcpy(p, root_fs_names);
+ while (*p++) {
+ if (p[-1] == ',')
+ p[-1] = '\0';
}
- *page = '\0';
+ *p = '\0';
+
+ for (p = page; *p; p += strlen(p)+1)
+ count++;
+
+ return count;
}

static int __init do_mount_root(const char *name, const char *fs,
@@ -391,15 +399,16 @@ void __init mount_block_root(char *name,
char *fs_names = page_address(page);
char *p;
char b[BDEVNAME_SIZE];
+ int num_fs, i;

scnprintf(b, BDEVNAME_SIZE, "unknown-block(%u,%u)",
MAJOR(ROOT_DEV), MINOR(ROOT_DEV));
if (root_fs_names)
- split_fs_names(fs_names, root_fs_names);
+ num_fs = split_fs_names(fs_names, root_fs_names);
else
- list_bdev_fs_names(fs_names, PAGE_SIZE);
+ num_fs = list_bdev_fs_names(fs_names, PAGE_SIZE);
retry:
- for (p = fs_names; *p; p += strlen(p)+1) {
+ for (p = fs_names, i = 0; i < num_fs; p += strlen(p)+1, i++) {
int err = do_mount_root(name, p, flags, root_mount_data);
switch (err) {
case 0:
@@ -432,7 +441,7 @@ retry:
printk("List of all partitions:\n");
printk_all_partitions();
printk("No filesystem could mount root, tried: ");
- for (p = fs_names; *p; p += strlen(p)+1)
+ for (p = fs_names, i = 0; i < num_fs; p += strlen(p)+1, i++)
printk(" %s", p);
printk("\n");
panic("VFS: Unable to mount root fs on %s", b);
@@ -533,13 +542,15 @@ static int __init mount_nodev_root(void)
{
char *fs_names, *fstype;
int err = -EINVAL;
+ int num_fs, i;

fs_names = (void *)__get_free_page(GFP_KERNEL);
if (!fs_names)
return -EINVAL;
- split_fs_names(fs_names, root_fs_names);
+ num_fs = split_fs_names(fs_names, root_fs_names);

- for (fstype = fs_names; *fstype; fstype += strlen(fstype) + 1) {
+ for (fstype = fs_names, i = 0; i < num_fs;
+ fstype += strlen(fstype) + 1, i++) {
if (!fs_is_nodev(fstype))
continue;
err = do_mount_root(root_device_name, fstype, root_mountflags,