On 2025/1/5 23:12, Hongzhen Luo wrote:
This modifies relevant functions to apply the page cacheCan we lock in folio granularity? The erofs_pcshr_private mutex may limit the concurrent in reading.
share feature.
Below is the memory usage for reading all files in two different minor
versions of container images:
+-------------------+------------------+-------------+---------------+
| Image | Page Cache Share | Memory (MB) | Memory |
| | | | Reduction (%) |
+-------------------+------------------+-------------+---------------+
| | No | 241 | - |
| redis +------------------+-------------+---------------+
| 7.2.4 & 7.2.5 | Yes | 163 | 33% |
+-------------------+------------------+-------------+---------------+
| | No | 872 | - |
| postgres +------------------+-------------+---------------+
| 16.1 & 16.2 | Yes | 630 | 28% |
+-------------------+------------------+-------------+---------------+
| | No | 2771 | - |
| tensorflow +------------------+-------------+---------------+
| 1.11.0 & 2.11.1 | Yes | 2340 | 16% |
+-------------------+------------------+-------------+---------------+
| | No | 926 | - |
| mysql +------------------+-------------+---------------+
| 8.0.11 & 8.0.12 | Yes | 735 | 21% |
+-------------------+------------------+-------------+---------------+
| | No | 390 | - |
| nginx +------------------+-------------+---------------+
| 7.2.4 & 7.2.5 | Yes | 219 | 44% |
+-------------------+------------------+-------------+---------------+
| tomcat | No | 924 | - |
| 10.1.25 & 10.1.26 +------------------+-------------+---------------+
| | Yes | 474 | 49% |
+-------------------+------------------+-------------+---------------+
Additionally, the table below shows the runtime memory usage of the
container:
+-------------------+------------------+-------------+---------------+
| Image | Page Cache Share | Memory (MB) | Memory |
| | | | Reduction (%) |
+-------------------+------------------+-------------+---------------+
| | No | 35 | - |
| redis +------------------+-------------+---------------+
| 7.2.4 & 7.2.5 | Yes | 28 | 20% |
+-------------------+------------------+-------------+---------------+
| | No | 149 | - |
| postgres +------------------+-------------+---------------+
| 16.1 & 16.2 | Yes | 95 | 37% |
+-------------------+------------------+-------------+---------------+
| | No | 1028 | - |
| tensorflow +------------------+-------------+---------------+
| 1.11.0 & 2.11.1 | Yes | 930 | 10% |
+-------------------+------------------+-------------+---------------+
| | No | 155 | - |
| mysql +------------------+-------------+---------------+
| 8.0.11 & 8.0.12 | Yes | 132 | 15% |
+-------------------+------------------+-------------+---------------+
| | No | 25 | - |
| nginx +------------------+-------------+---------------+
| 7.2.4 & 7.2.5 | Yes | 20 | 20% |
+-------------------+------------------+-------------+---------------+
| tomcat | No | 186 | - |
| 10.1.25 & 10.1.26 +------------------+-------------+---------------+
| | Yes | 98 | 48% |
+-------------------+------------------+-------------+---------------+
Signed-off-by: Hongzhen Luo <hongzhen@xxxxxxxxxxxxxxxxx>
---
fs/erofs/data.c | 14 +++++++--
fs/erofs/inode.c | 5 ++-
fs/erofs/pagecache_share.c | 63 ++++++++++++++++++++++++++++++++++++++
fs/erofs/pagecache_share.h | 11 +++++++
fs/erofs/super.c | 7 +++++
fs/erofs/zdata.c | 9 ++++--
6 files changed, 104 insertions(+), 5 deletions(-)
diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 0cd6b5c4df98..fb08acbeaab6 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -5,6 +5,7 @@
* Copyright (C) 2021, Alibaba Cloud
*/
#include "internal.h"
+#include "pagecache_share.h"
#include <linux/sched/mm.h>
#include <trace/events/erofs.h>
@@ -370,12 +371,21 @@ int erofs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
*/
static int erofs_read_folio(struct file *file, struct folio *folio)
{
- return iomap_read_folio(folio, &erofs_iomap_ops);
+ int ret, pcshr;
+
+ pcshr = erofs_pcshr_read_begin(file, folio);
+ ret = iomap_read_folio(folio, &erofs_iomap_ops);
+ erofs_pcshr_read_end(file, folio, pcshr);
+ return ret;
}
static void erofs_readahead(struct readahead_control *rac)
{
- return iomap_readahead(rac, &erofs_iomap_ops);
+ int pcshr;
+
+ pcshr = erofs_pcshr_readahead_begin(rac);
+ iomap_readahead(rac, &erofs_iomap_ops);
+ erofs_pcshr_readahead_end(rac, pcshr);
}
static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
diff --git a/fs/erofs/inode.c b/fs/erofs/inode.c
index d4b89407822a..0b070f4b46b8 100644
--- a/fs/erofs/inode.c
+++ b/fs/erofs/inode.c
@@ -5,6 +5,7 @@
* Copyright (C) 2021, Alibaba Cloud
*/
#include "xattr.h"
+#include "pagecache_share.h"
#include <trace/events/erofs.h>
static int erofs_fill_symlink(struct inode *inode, void *kaddr,
@@ -212,7 +213,9 @@ static int erofs_fill_inode(struct inode *inode)
switch (inode->i_mode & S_IFMT) {
case S_IFREG:
inode->i_op = &erofs_generic_iops;
- if (erofs_inode_is_data_compressed(vi->datalayout))
+ if (erofs_pcshr_fill_inode(inode) == 0)
+ inode->i_fop = &erofs_pcshr_fops;
+ else if (erofs_inode_is_data_compressed(vi->datalayout))
inode->i_fop = &generic_ro_fops;
else
inode->i_fop = &erofs_file_fops;
diff --git a/fs/erofs/pagecache_share.c b/fs/erofs/pagecache_share.c
index 703fd17c002c..22172b5e21c7 100644
--- a/fs/erofs/pagecache_share.c
+++ b/fs/erofs/pagecache_share.c
@@ -22,6 +22,7 @@ struct erofs_pcshr_counter {
struct erofs_pcshr_private {
char fprt[PCSHR_FPRT_MAXLEN];
+ struct mutex mutex;
};
static struct erofs_pcshr_counter mnt_counter = {
@@ -84,6 +85,7 @@ static int erofs_fprt_set(struct inode *inode, void *data)
if (!ano_private)
return -ENOMEM;
memcpy(ano_private, data, sizeof(size_t) + *(size_t *)data);
+ mutex_init(&ano_private->mutex);
inode->i_private = ano_private;
return 0;
}
@@ -226,3 +228,64 @@ const struct file_operations erofs_pcshr_fops = {
.get_unmapped_area = thp_get_unmapped_area,
.splice_read = filemap_splice_read,
};
+
+int erofs_pcshr_read_begin(struct file *file, struct folio *folio)
+{
+ struct erofs_inode *vi;
+ struct erofs_pcshr_private *ano_private;
+
+ if (!(file && file->private_data))
+ return 0;
+
+ vi = file->private_data;
+ if (vi->ano_inode != file_inode(file))
+ return 0;
+
+ ano_private = vi->ano_inode->i_private;
+ mutex_lock(&ano_private->mutex);