[PATCH v2 1/5] dma-buf: heaps: Introduce DMA_HEAP_ALLOC_AND_READ_FILE heap flag
From: Huan Yang
Date: Tue Jul 30 2024 - 04:00:44 EST
Some user may need load file into dma-buf, current way is:
1. allocate a dma-buf, get dma-buf fd
2. mmap dma-buf fd into user vaddr
3. read(file_fd, vaddr, fsz)
Due to dma-buf can't support direct I/O(can't pin, not pure page base),
the file read must be buffer I/O.
This means that during the process of reading the file into dma-buf,
page cache needs to be generated, and the corresponding content needs to
be first copied to the page cache before being copied to the dma-buf.
This method worked well when reading relatively small files before, as
the page cache can cache the file content, thus improving performance.
However, there are new challenges currently, especially as AI models are
becoming larger and need to be shared between DMA devices and the CPU
via dma-buf.
For example, the current 3B model file size is around 3.4GB. Using the
previous method would mean generating a total of 3.4GB of page cache
(even if it will be reclaimed), and also requiring the copying of 3.4GB
of content between page cache and dma-buf.
Due to the limited nature of system memory, files in the gigabyte range
cannot persist in memory indefinitely, so this portion of page cache may
not provide much assistance for subsequent reads. Additionally, the
existence of page cache will consume additional system resources due to
the extra copying required by the CPU.
Therefore, it is necessary for dma-buf to support direct I/O.
This patch provides a method to immediately read the file content after
the dma-buf is allocated, and only returns the dma-buf file descriptor
after the file is fully read.
Since the dma-buf file descriptor is not returned, no other thread can
access it except for the current thread, so we don't need to worry about
race conditions.
Map the dma-buf to the vmalloc area and initiate file reads in kernel
space, supporting both buffer I/O and direct I/O.
This patch adds the DMA_HEAP_ALLOC_AND_READ heap_flag for upper layers.
When a user needs to allocate a dma-buf and read a file, they should
pass this flag. As the size of the file being read is fixed, there is no
need to pass the 'len' parameter.
Instead, The file_fd needs to be passed to indicate to the kernel the file
that needs to be read, and the file open flag determines the mode of
file reading. But, please note that if direct I/O(O_DIRECT) is needed to
read the file, the file size must be page aligned.
Therefore, for the user, len and file_fd are mutually exclusive,
and they are combined using a union.
Once the user obtains the dma-buf fd, the dma-buf directly contains the
file content.
Signed-off-by: Huan Yang <link@xxxxxxxx>
---
drivers/dma-buf/dma-heap.c | 127 +++++++++++++++++++++++++++++++++-
include/uapi/linux/dma-heap.h | 11 ++-
2 files changed, 133 insertions(+), 5 deletions(-)
diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c
index 2298ca5e112e..f19b944d4eaa 100644
--- a/drivers/dma-buf/dma-heap.c
+++ b/drivers/dma-buf/dma-heap.c
@@ -43,12 +43,128 @@ struct dma_heap {
struct cdev heap_cdev;
};
+/**
+ * struct dma_heap_file - wrap the file, read task for dma_heap allocate use.
+ * @file: file to read from.
+ * @fsize: file size.
+ */
+struct dma_heap_file {
+ struct file *file;
+ size_t fsize;
+};
+
static LIST_HEAD(heap_list);
static DEFINE_MUTEX(heap_list_lock);
static dev_t dma_heap_devt;
static struct class *dma_heap_class;
static DEFINE_XARRAY_ALLOC(dma_heap_minors);
+static int init_dma_heap_file(struct dma_heap_file *heap_file, int file_fd)
+{
+ struct file *file;
+ size_t fsz;
+
+ file = fget(file_fd);
+ if (!file)
+ return -EINVAL;
+
+ // Direct I/O only support PAGE_SIZE aligned files.
+ fsz = i_size_read(file_inode(file));
+ if (file->f_flags & O_DIRECT && !PAGE_ALIGNED(fsz))
+ return -EINVAL;
+
+ heap_file->fsize = fsz;
+ heap_file->file = file;
+
+ return 0;
+}
+
+static void deinit_dma_heap_file(struct dma_heap_file *heap_file)
+{
+ fput(heap_file->file);
+}
+
+/**
+ * Trigger sync file read, read into dma-buf.
+ *
+ * @dmabuf: which we done alloced and export.
+ * @heap_file: file info wrapper to read from.
+ *
+ * Whether to use buffer I/O or direct I/O depends on the mode when the
+ * file is opened.
+ * Remember, if use direct I/O, file must be page aligned.
+ * Since the buffer used for file reading is provided by dma-buf, when
+ * using direct I/O, the file content will be directly filled into
+ * dma-buf without the need for additional CPU copying.
+ *
+ * 0 on success, negative if anything wrong.
+ */
+static int dma_heap_read_file_sync(struct dma_buf *dmabuf,
+ struct dma_heap_file *heap_file)
+{
+ struct iosys_map map;
+ ssize_t bytes;
+ int ret;
+
+ ret = dma_buf_vmap(dmabuf, &map);
+ if (ret)
+ return ret;
+
+ /**
+ * The kernel_read_file function can handle file reading effectively,
+ * and if the return value does not match the file size,
+ * then it indicates an error.
+ */
+ bytes = kernel_read_file(heap_file->file, 0, &map.vaddr, dmabuf->size,
+ &heap_file->fsize, READING_POLICY);
+ if (bytes != heap_file->fsize)
+ ret = -EIO;
+
+ dma_buf_vunmap(dmabuf, &map);
+
+ return ret;
+}
+
+static int dma_heap_buffer_alloc_and_read(struct dma_heap *heap, int file_fd,
+ u32 fd_flags, u64 heap_flags)
+{
+ struct dma_heap_file heap_file;
+ struct dma_buf *dmabuf;
+ int ret, fd;
+
+ ret = init_dma_heap_file(&heap_file, file_fd);
+ if (ret)
+ return ret;
+
+ dmabuf = heap->ops->allocate(heap, heap_file.fsize, fd_flags,
+ heap_flags);
+ if (IS_ERR(dmabuf)) {
+ ret = PTR_ERR(dmabuf);
+ goto error_file;
+ }
+
+ ret = dma_heap_read_file_sync(dmabuf, &heap_file);
+ if (ret)
+ goto error_put;
+
+ ret = dma_buf_fd(dmabuf, fd_flags);
+ if (ret < 0)
+ goto error_put;
+
+ fd = ret;
+
+ deinit_dma_heap_file(&heap_file);
+
+ return fd;
+
+error_put:
+ dma_buf_put(dmabuf);
+error_file:
+ deinit_dma_heap_file(&heap_file);
+
+ return ret;
+}
+
static int dma_heap_buffer_alloc(struct dma_heap *heap, size_t len,
u32 fd_flags,
u64 heap_flags)
@@ -108,9 +224,14 @@ static long dma_heap_ioctl_allocate(struct file *file, void *data)
if (heap_allocation->heap_flags & ~DMA_HEAP_VALID_HEAP_FLAGS)
return -EINVAL;
- fd = dma_heap_buffer_alloc(heap, heap_allocation->len,
- heap_allocation->fd_flags,
- heap_allocation->heap_flags);
+ if (heap_allocation->heap_flags & DMA_HEAP_ALLOC_AND_READ_FILE)
+ fd = dma_heap_buffer_alloc_and_read(
+ heap, heap_allocation->file_fd,
+ heap_allocation->fd_flags, heap_allocation->heap_flags);
+ else
+ fd = dma_heap_buffer_alloc(heap, heap_allocation->len,
+ heap_allocation->fd_flags,
+ heap_allocation->heap_flags);
if (fd < 0)
return fd;
diff --git a/include/uapi/linux/dma-heap.h b/include/uapi/linux/dma-heap.h
index a4cf716a49fa..ef2fbd885825 100644
--- a/include/uapi/linux/dma-heap.h
+++ b/include/uapi/linux/dma-heap.h
@@ -18,13 +18,17 @@
/* Valid FD_FLAGS are O_CLOEXEC, O_RDONLY, O_WRONLY, O_RDWR */
#define DMA_HEAP_VALID_FD_FLAGS (O_CLOEXEC | O_ACCMODE)
+/* Heap will read file after alloc done, len field change to file fd */
+#define DMA_HEAP_ALLOC_AND_READ_FILE 00000001
+
/* Currently no heap flags */
-#define DMA_HEAP_VALID_HEAP_FLAGS (0ULL)
+#define DMA_HEAP_VALID_HEAP_FLAGS (DMA_HEAP_ALLOC_AND_READ_FILE)
/**
* struct dma_heap_allocation_data - metadata passed from userspace for
* allocations
* @len: size of the allocation
+ * @file_fd: file descriptor to read the allocation from
* @fd: will be populated with a fd which provides the
* handle to the allocated dma-buf
* @fd_flags: file descriptor flags used when allocating
@@ -33,7 +37,10 @@
* Provided by userspace as an argument to the ioctl
*/
struct dma_heap_allocation_data {
- __u64 len;
+ union {
+ __u64 len;
+ __u32 file_fd;
+ };
__u32 fd;
__u32 fd_flags;
__u64 heap_flags;
--
2.45.2