[PATCH v3 2/6] rds: Brute force GFP_NOIO

From: Håkon Bugge
Date: Wed May 22 2024 - 09:56:02 EST


For most entry points to RDS, we call memalloc_noio_{save,restore} in
a parenthetic fashion when enabled by the module parameter force_noio.

We skip the calls to memalloc_noio_{save,restore} in rds_ioctl(), as
no memory allocations are executed in this function or its callees.

The reason we execute memalloc_noio_{save,restore} in rds_poll(), is
due to the following call chain:

rds_poll()
poll_wait()
__pollwait()
poll_get_entry()
__get_free_page(GFP_KERNEL)

The function rds_setsockopt() allocates memory in its callee's
rds_get_mr() and rds_get_mr_for_dest(). Hence, we need
memalloc_noio_{save,restore} in rds_setsockopt().

In rds_getsockopt(), we have rds_info_getsockopt() that allocates
memory. Hence, we need memalloc_noio_{save,restore} in
rds_getsockopt().

All the above, in order to conditionally enable RDS to become a block I/O
device.

Signed-off-by: Håkon Bugge <haakon.bugge@xxxxxxxxxx>

---

v1 -> v2:
* s/EXPORT_SYMBOL/static/ for the rds_force_noio variable as
pin-pointed by Simon
* Straightened the reverse xmas tree two places
* Fixed C/P error in rds_cancel_sent_to() where I had two _save()s
and no _restore() as reported by Simon
---
net/rds/af_rds.c | 59 +++++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 56 insertions(+), 3 deletions(-)

diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 8435a20968ef5..846ad20b3783a 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -37,10 +37,15 @@
#include <linux/in.h>
#include <linux/ipv6.h>
#include <linux/poll.h>
+#include <linux/sched/mm.h>
#include <net/sock.h>

#include "rds.h"

+static bool rds_force_noio;
+module_param_named(force_noio, rds_force_noio, bool, 0444);
+MODULE_PARM_DESC(force_noio, "Force the use of GFP_NOIO (Y/N)");
+
/* this is just used for stats gathering :/ */
static DEFINE_SPINLOCK(rds_sock_lock);
static unsigned long rds_sock_count;
@@ -59,8 +64,12 @@ DECLARE_WAIT_QUEUE_HEAD(rds_poll_waitq);
static int rds_release(struct socket *sock)
{
struct sock *sk = sock->sk;
+ unsigned int noio_flags;
struct rds_sock *rs;

+ if (rds_force_noio)
+ noio_flags = memalloc_noio_save();
+
if (!sk)
goto out;

@@ -90,6 +99,8 @@ static int rds_release(struct socket *sock)
sock->sk = NULL;
sock_put(sk);
out:
+ if (rds_force_noio)
+ memalloc_noio_restore(noio_flags);
return 0;
}

@@ -214,9 +225,13 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
{
struct sock *sk = sock->sk;
struct rds_sock *rs = rds_sk_to_rs(sk);
+ unsigned int noio_flags;
__poll_t mask = 0;
unsigned long flags;

+ if (rds_force_noio)
+ noio_flags = memalloc_noio_save();
+
poll_wait(file, sk_sleep(sk), wait);

if (rs->rs_seen_congestion)
@@ -249,6 +264,8 @@ static __poll_t rds_poll(struct file *file, struct socket *sock,
if (mask)
rs->rs_seen_congestion = 0;

+ if (rds_force_noio)
+ memalloc_noio_restore(noio_flags);
return mask;
}

@@ -293,9 +310,13 @@ static int rds_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
static int rds_cancel_sent_to(struct rds_sock *rs, sockptr_t optval, int len)
{
struct sockaddr_in6 sin6;
+ unsigned int noio_flags;
struct sockaddr_in sin;
int ret = 0;

+ if (rds_force_noio)
+ noio_flags = memalloc_noio_save();
+
/* racing with another thread binding seems ok here */
if (ipv6_addr_any(&rs->rs_bound_addr)) {
ret = -ENOTCONN; /* XXX not a great errno */
@@ -324,6 +345,8 @@ static int rds_cancel_sent_to(struct rds_sock *rs, sockptr_t optval, int len)

rds_send_drop_to(rs, &sin6);
out:
+ if (rds_force_noio)
+ memalloc_noio_restore(noio_flags);
return ret;
}

@@ -485,8 +508,12 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
{
struct rds_sock *rs = rds_sk_to_rs(sock->sk);
int ret = -ENOPROTOOPT, len;
+ unsigned int noio_flags;
int trans;

+ if (rds_force_noio)
+ noio_flags = memalloc_noio_save();
+
if (level != SOL_RDS)
goto out;

@@ -529,6 +556,8 @@ static int rds_getsockopt(struct socket *sock, int level, int optname,
}

out:
+ if (rds_force_noio)
+ memalloc_noio_restore(noio_flags);
return ret;

}
@@ -538,12 +567,16 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
{
struct sock *sk = sock->sk;
struct sockaddr_in *sin;
+ unsigned int noio_flags;
struct rds_sock *rs = rds_sk_to_rs(sk);
int ret = 0;

if (addr_len < offsetofend(struct sockaddr, sa_family))
return -EINVAL;

+ if (rds_force_noio)
+ noio_flags = memalloc_noio_save();
+
lock_sock(sk);

switch (uaddr->sa_family) {
@@ -626,6 +659,8 @@ static int rds_connect(struct socket *sock, struct sockaddr *uaddr,
}

release_sock(sk);
+ if (rds_force_noio)
+ memalloc_noio_restore(noio_flags);
return ret;
}

@@ -697,16 +732,28 @@ static int __rds_create(struct socket *sock, struct sock *sk, int protocol)
static int rds_create(struct net *net, struct socket *sock, int protocol,
int kern)
{
+ unsigned int noio_flags;
struct sock *sk;
+ int ret;

if (sock->type != SOCK_SEQPACKET || protocol)
return -ESOCKTNOSUPPORT;

+ if (rds_force_noio)
+ noio_flags = memalloc_noio_save();
+
sk = sk_alloc(net, AF_RDS, GFP_KERNEL, &rds_proto, kern);
- if (!sk)
- return -ENOMEM;
+ if (!sk) {
+ ret = -ENOMEM;
+ goto out;
+ }

- return __rds_create(sock, sk, protocol);
+ ret = __rds_create(sock, sk, protocol);
+out:
+ if (rds_force_noio)
+ memalloc_noio_restore(noio_flags);
+
+ return ret;
}

void rds_sock_addref(struct rds_sock *rs)
@@ -895,8 +942,12 @@ u32 rds_gen_num;

static int __init rds_init(void)
{
+ unsigned int noio_flags;
int ret;

+ if (rds_force_noio)
+ noio_flags = memalloc_noio_save();
+
net_get_random_once(&rds_gen_num, sizeof(rds_gen_num));

ret = rds_bind_lock_init();
@@ -947,6 +998,8 @@ static int __init rds_init(void)
out_bind:
rds_bind_lock_destroy();
out:
+ if (rds_force_noio)
+ memalloc_noio_restore(noio_flags);
return ret;
}
module_init(rds_init);
--
2.31.1