Re: [PATCH v3 2/2] initramfs: introduce do_readxattrs()

From: hpa
Date: Fri May 17 2019 - 16:20:51 EST


On May 17, 2019 9:55:19 AM PDT, Roberto Sassu <roberto.sassu@xxxxxxxxxx> wrote:
>This patch adds support for an alternative method to add xattrs to
>files in
>the rootfs filesystem. Instead of extracting them directly from the ram
>disk image, they are extracted from a regular file called .xattr-list,
>that
>can be added by any ram disk generator available today. The file format
>is:
>
><file #N data len (ASCII, 10 chars)><file #N path>\0
><xattr #N data len (ASCII, 8 chars)><xattr #N name>\0<xattr #N value>
>
>.xattr-list can be generated by executing:
>
>$ getfattr --absolute-names -d -h -R -e hex -m - \
> <file list> | xattr.awk -b > ${initdir}/.xattr-list
>
>where the content of the xattr.awk script is:
>
>#! /usr/bin/awk -f
>{
> if (!length($0)) {
> printf("%.10x%s\0", len, file);
> for (x in xattr) {
> printf("%.8x%s\0", xattr_len[x], x);
> for (i = 0; i < length(xattr[x]) / 2; i++) {
> printf("%c", strtonum("0x"substr(xattr[x], i * 2 + 1, 2)));
> }
> }
> i = 0;
> delete xattr;
> delete xattr_len;
> next;
> };
> if (i == 0) {
> file=$3;
> len=length(file) + 8 + 1;
> }
> if (i > 0) {
> split($0, a, "=");
> xattr[a[1]]=substr(a[2], 3);
> xattr_len[a[1]]=length(a[1]) + 1 + 8 + length(xattr[a[1]]) / 2;
> len+=xattr_len[a[1]];
> };
> i++;
>}
>
>Signed-off-by: Roberto Sassu <roberto.sassu@xxxxxxxxxx>
>---
> init/initramfs.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++++
> 1 file changed, 99 insertions(+)
>
>diff --git a/init/initramfs.c b/init/initramfs.c
>index 0c6dd1d5d3f6..6ec018c6279a 100644
>--- a/init/initramfs.c
>+++ b/init/initramfs.c
>@@ -13,6 +13,8 @@
> #include <linux/namei.h>
> #include <linux/xattr.h>
>
>+#define XATTR_LIST_FILENAME ".xattr-list"
>+
> static ssize_t __init xwrite(int fd, const char *p, size_t count)
> {
> ssize_t out = 0;
>@@ -382,6 +384,97 @@ static int __init __maybe_unused do_setxattrs(char
>*pathname)
> return 0;
> }
>
>+struct path_hdr {
>+ char p_size[10]; /* total size including p_size field */
>+ char p_data[]; /* <path>\0<xattrs> */
>+};
>+
>+static int __init do_readxattrs(void)
>+{
>+ struct path_hdr hdr;
>+ char *path = NULL;
>+ char str[sizeof(hdr.p_size) + 1];
>+ unsigned long file_entry_size;
>+ size_t size, path_size, total_size;
>+ struct kstat st;
>+ struct file *file;
>+ loff_t pos;
>+ int ret;
>+
>+ ret = vfs_lstat(XATTR_LIST_FILENAME, &st);
>+ if (ret < 0)
>+ return ret;
>+
>+ total_size = st.size;
>+
>+ file = filp_open(XATTR_LIST_FILENAME, O_RDONLY, 0);
>+ if (IS_ERR(file))
>+ return PTR_ERR(file);
>+
>+ pos = file->f_pos;
>+
>+ while (total_size) {
>+ size = kernel_read(file, (char *)&hdr, sizeof(hdr), &pos);
>+ if (size != sizeof(hdr)) {
>+ ret = -EIO;
>+ goto out;
>+ }
>+
>+ total_size -= size;
>+
>+ str[sizeof(hdr.p_size)] = 0;
>+ memcpy(str, hdr.p_size, sizeof(hdr.p_size));
>+ ret = kstrtoul(str, 16, &file_entry_size);
>+ if (ret < 0)
>+ goto out;
>+
>+ file_entry_size -= sizeof(sizeof(hdr.p_size));
>+ if (file_entry_size > total_size) {
>+ ret = -EINVAL;
>+ goto out;
>+ }
>+
>+ path = vmalloc(file_entry_size);
>+ if (!path) {
>+ ret = -ENOMEM;
>+ goto out;
>+ }
>+
>+ size = kernel_read(file, path, file_entry_size, &pos);
>+ if (size != file_entry_size) {
>+ ret = -EIO;
>+ goto out_free;
>+ }
>+
>+ total_size -= size;
>+
>+ path_size = strnlen(path, file_entry_size);
>+ if (path_size == file_entry_size) {
>+ ret = -EINVAL;
>+ goto out_free;
>+ }
>+
>+ xattr_buf = path + path_size + 1;
>+ xattr_len = file_entry_size - path_size - 1;
>+
>+ ret = do_setxattrs(path);
>+ vfree(path);
>+ path = NULL;
>+
>+ if (ret < 0)
>+ break;
>+ }
>+out_free:
>+ vfree(path);
>+out:
>+ fput(file);
>+
>+ if (ret < 0)
>+ error("Unable to parse xattrs");
>+
>+ return ret;
>+}
>+
> static __initdata int wfd;
>
> static int __init do_name(void)
>@@ -391,6 +484,11 @@ static int __init do_name(void)
> if (strcmp(collected, "TRAILER!!!") == 0) {
> free_hash();
> return 0;
>+ } else if (strcmp(collected, XATTR_LIST_FILENAME) == 0) {
>+ struct kstat st;
>+
>+ if (!vfs_lstat(collected, &st))
>+ do_readxattrs();
> }
> clean_path(collected, mode);
> if (S_ISREG(mode)) {
>@@ -562,6 +660,7 @@ static char * __init unpack_to_rootfs(char *buf,
>unsigned long len)
> buf += my_inptr;
> len -= my_inptr;
> }
>+ do_readxattrs();
> dir_utime();
> kfree(name_buf);
> kfree(symlink_buf);

Ok... I just realized this does not work for a modular initramfs, composed at load time from multiple files, which is a very real problem. Should be easy enough to deal with: instead of one large file, use one companion file per source file, perhaps something like filename..xattrs (suggesting double dots to make it less likely to conflict with a "real" file.) No leading dot, as it makes it more likely that archivers will sort them before the file proper.

A side benefit is that the format can be simpler as there is no need to encode the filename.

A technically cleaner solution still, but which would need archiver modifications, would be to encode the xattrs as an optionally nameless file (just an empty string) with a new file mode value, immediately following the original file. The advantage there is that the archiver itself could support xattrs and other extended metadata (which has been requested elsewhere); the disadvantage obviously is that that it requires new support in the archiver. However, at least it ought to be simpler since it is still a higher protocol level than the cpio archive itself.

There's already one special case in cpio, which is the "!!!TRAILER!!!" filename; although I don't think it is part of the formal spec, to the extent there is one, I would expect that in practice it is always encoded with a mode of 0, which incidentally could be used to unbreak the case where such a filename actually exists. So one way to support such extended metadata would be to set mode to 0 and use the filename to encode the type of metadata. I wonder how existing GNU or BSD cpio (the BSD one is better maintained these days) would deal with reading such a file; it would at least not be a regression if it just read it still, possibly with warnings. It could also be possible to use bits 17:16 in the mode, which are traditionally always zero (mode_t being 16 bits), but I believe are present in most or all of the cpio formats for historical reasons. It might be accepted better by existing implementations to use one of these high bits combined with S_IFREG, I dont know.

--
Sent from my Android device with K-9 Mail. Please excuse my brevity.