[PATCH 5.3 016/193] net/tls: add a TX lock

From: Greg Kroah-Hartman
Date: Mon Nov 11 2019 - 13:48:03 EST


From: Jakub Kicinski <jakub.kicinski@xxxxxxxxxxxxx>

[ Upstream commit 79ffe6087e9145d2377385cac48d0d6a6b4225a5 ]

TLS TX needs to release and re-acquire the socket lock if send buffer
fills up.

TLS SW TX path currently depends on only allowing one thread to enter
the function by the abuse of sk_write_pending. If another writer is
already waiting for memory no new ones are allowed in.

This has two problems:
- writers don't wake other threads up when they leave the kernel;
meaning that this scheme works for single extra thread (second
application thread or delayed work) because memory becoming
available will send a wake up request, but as Mallesham and
Pooja report with larger number of threads it leads to threads
being put to sleep indefinitely;
- the delayed work does not get _scheduled_ but it may _run_ when
other writers are present leading to crashes as writers don't
expect state to change under their feet (same records get pushed
and freed multiple times); it's hard to reliably bail from the
work, however, because the mere presence of a writer does not
guarantee that the writer will push pending records before exiting.

Ensuring wakeups always happen will make the code basically open
code a mutex. Just use a mutex.

The TLS HW TX path does not have any locking (not even the
sk_write_pending hack), yet it uses a per-socket sg_tx_data
array to push records.

Fixes: a42055e8d2c3 ("net/tls: Add support for async encryption of records for performance")
Reported-by: Mallesham Jatharakonda <mallesh537@xxxxxxxxx>
Reported-by: Pooja Trivedi <poojatrivedi@xxxxxxxxx>
Signed-off-by: Jakub Kicinski <jakub.kicinski@xxxxxxxxxxxxx>
Reviewed-by: Simon Horman <simon.horman@xxxxxxxxxxxxx>
Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
include/net/tls.h | 5 +++++
net/tls/tls_device.c | 6 ++++++
net/tls/tls_main.c | 2 ++
net/tls/tls_sw.c | 21 +++++++--------------
4 files changed, 20 insertions(+), 14 deletions(-)

--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -40,6 +40,7 @@
#include <linux/socket.h>
#include <linux/tcp.h>
#include <linux/skmsg.h>
+#include <linux/mutex.h>
#include <linux/netdevice.h>

#include <net/tcp.h>
@@ -268,6 +269,10 @@ struct tls_context {

bool in_tcp_sendpages;
bool pending_open_record_frags;
+
+ struct mutex tx_lock; /* protects partially_sent_* fields and
+ * per-type TX fields
+ */
unsigned long flags;

/* cache cold stuff */
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -482,8 +482,10 @@ last_record:
int tls_device_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
{
unsigned char record_type = TLS_RECORD_TYPE_DATA;
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
int rc;

+ mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);

if (unlikely(msg->msg_controllen)) {
@@ -497,12 +499,14 @@ int tls_device_sendmsg(struct sock *sk,

out:
release_sock(sk);
+ mutex_unlock(&tls_ctx->tx_lock);
return rc;
}

int tls_device_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
struct iov_iter msg_iter;
char *kaddr = kmap(page);
struct kvec iov;
@@ -511,6 +515,7 @@ int tls_device_sendpage(struct sock *sk,
if (flags & MSG_SENDPAGE_NOTLAST)
flags |= MSG_MORE;

+ mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);

if (flags & MSG_OOB) {
@@ -527,6 +532,7 @@ int tls_device_sendpage(struct sock *sk,

out:
release_sock(sk);
+ mutex_unlock(&tls_ctx->tx_lock);
return rc;
}

--- a/net/tls/tls_main.c
+++ b/net/tls/tls_main.c
@@ -258,6 +258,7 @@ void tls_ctx_free(struct tls_context *ct

memzero_explicit(&ctx->crypto_send, sizeof(ctx->crypto_send));
memzero_explicit(&ctx->crypto_recv, sizeof(ctx->crypto_recv));
+ mutex_destroy(&ctx->tx_lock);
kfree(ctx);
}

@@ -615,6 +616,7 @@ static struct tls_context *create_ctx(st
ctx->getsockopt = sk->sk_prot->getsockopt;
ctx->sk_proto_close = sk->sk_prot->close;
ctx->unhash = sk->sk_prot->unhash;
+ mutex_init(&ctx->tx_lock);
return ctx;
}

--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -897,15 +897,9 @@ int tls_sw_sendmsg(struct sock *sk, stru
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
return -ENOTSUPP;

+ mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);

- /* Wait till there is any pending write on socket */
- if (unlikely(sk->sk_write_pending)) {
- ret = wait_on_pending_writer(sk, &timeo);
- if (unlikely(ret))
- goto send_end;
- }
-
if (unlikely(msg->msg_controllen)) {
ret = tls_proccess_cmsg(sk, msg, &record_type);
if (ret) {
@@ -1091,6 +1085,7 @@ send_end:
ret = sk_stream_error(sk, msg->msg_flags, ret);

release_sock(sk);
+ mutex_unlock(&tls_ctx->tx_lock);
return copied ? copied : ret;
}

@@ -1114,13 +1109,6 @@ static int tls_sw_do_sendpage(struct soc
eor = !(flags & (MSG_MORE | MSG_SENDPAGE_NOTLAST));
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);

- /* Wait till there is any pending write on socket */
- if (unlikely(sk->sk_write_pending)) {
- ret = wait_on_pending_writer(sk, &timeo);
- if (unlikely(ret))
- goto sendpage_end;
- }
-
/* Call the sk_stream functions to manage the sndbuf mem. */
while (size > 0) {
size_t copy, required_size;
@@ -1219,15 +1207,18 @@ sendpage_end:
int tls_sw_sendpage(struct sock *sk, struct page *page,
int offset, size_t size, int flags)
{
+ struct tls_context *tls_ctx = tls_get_ctx(sk);
int ret;

if (flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL |
MSG_SENDPAGE_NOTLAST | MSG_SENDPAGE_NOPOLICY))
return -ENOTSUPP;

+ mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
ret = tls_sw_do_sendpage(sk, page, offset, size, flags);
release_sock(sk);
+ mutex_unlock(&tls_ctx->tx_lock);
return ret;
}

@@ -2172,9 +2163,11 @@ static void tx_work_handler(struct work_

if (!test_and_clear_bit(BIT_TX_SCHEDULED, &ctx->tx_bitmask))
return;
+ mutex_lock(&tls_ctx->tx_lock);
lock_sock(sk);
tls_tx_records(sk, -1);
release_sock(sk);
+ mutex_unlock(&tls_ctx->tx_lock);
}

void tls_sw_write_space(struct sock *sk, struct tls_context *ctx)