Re: [PATCH v5 07/11] PM: hibernate: Add kernel-based encryption

From: Eric Biggers
Date: Sun Nov 13 2022 - 17:55:29 EST


On Fri, Nov 11, 2022 at 03:16:32PM -0800, Evan Green wrote:
> Enabling the kernel to be able to do encryption and integrity checks on
> the hibernate image prevents a malicious userspace from escalating to
> kernel execution via hibernation resume. As a first step toward this, add
> the scaffolding needed for the kernel to do AEAD encryption on the
> hibernate image, giving us both secrecy and integrity.

"Authenticity" would be more accurate than "integrity".

It is a common mistake, though...

> +#define USWSUSP_KEY_NONCE_SIZE 16
> +
> +/*
> + * This structure is used to pass the kernel's hibernate encryption key in
> + * either direction.
> + */
> +struct uswsusp_key_blob {
> + __u32 blob_len;
> + __u8 blob[512];
> + __u8 nonce[USWSUSP_KEY_NONCE_SIZE];
> +} __attribute__((packed));

What is the purpose of the nonce here?

It seems to be associated with the key? That doesn't make sense.

Also, please don't use __attribute__((packed)) on structures unless a specific
byte layout is needed (e.g. due to the struct being stored on-disk).

> +config ENCRYPTED_HIBERNATION
> + bool "Encryption support for userspace snapshots"
> + depends on HIBERNATION_SNAPSHOT_DEV
> + depends on CRYPTO_AEAD2=y

"gcm(aes)" from the crypto API is being used, so CRYPTO_GCM and CRYPTO_AES are
needed to ensure that is available.

> +/* Encrypt more data from the snapshot into the staging area. */
> +static int snapshot_encrypt_refill(struct snapshot_data *data)
> +{
> + struct aead_request *req = data->aead_req;
> + u8 nonce[GCM_AES_IV_SIZE];
> + DECLARE_CRYPTO_WAIT(wait);
> + size_t total = 0;
> + int pg_idx;
> + int res;
> +
> + /*
> + * The first buffer is the associated data, set to the offset to prevent
> + * attacks that rearrange chunks.
> + */
> + sg_set_buf(&data->sg[0], &data->crypt_total, sizeof(data->crypt_total));

Wouldn't it be simpler to use implicit nonces? I.e. make each nonce be the
offset of the page, instead of a value stored alongside the ciphertext? Then
there would be no need to include the offset of the page in the AAD.

> + /*
> + * Copy the page into the staging area. A future optimization
> + * could potentially skip this copy for lowmem pages.
> + */
> + memcpy(buf, data_of(data->handle), PAGE_SIZE);
> + sg_set_buf(&data->sg[1 + pg_idx], buf, PAGE_SIZE);
> + total += PAGE_SIZE;

This is a bit ugly. It means that highmem pages in the snapshot get copied
twice, first from the snapshot page to the static variable 'buffer', and then to
a page in snapshot_data::crypt_pages. And lowmem pages in the snapshot get
copied once, into snapshot_data::crypt_pages.

It should be possible to encrypt directly from the snapshot page in both cases,
saving all the copies.

I suppose it is fine as-is for now, but I wonder if there's a clean way to
implement the zero-copy method.

> + }
> +
> + sg_set_buf(&data->sg[1 + pg_idx], &data->auth_tag, SNAPSHOT_AUTH_TAG_SIZE);
> + aead_request_set_callback(req, 0, crypto_req_done, &wait);
> + /*
> + * Use incrementing nonces for each chunk, since a 64 bit value won't
> + * roll into re-use for any given hibernate image.
> + */
> + memcpy(&nonce[0], &data->nonce_low, sizeof(data->nonce_low));
> + memcpy(&nonce[sizeof(data->nonce_low)],
> + &data->nonce_high,
> + sizeof(nonce) - sizeof(data->nonce_low));
> + data->nonce_low += 1;

Hmm, so you *are* using incrementing nonces. In that case there's no need to
store them alongside the ciphertext or to include them in the AAD.

Also, since it's being assumed that the counter doesn't overflow 64 bits, there
should be no need for 'nonce_high' to exist.

> + /* Set up the encryption transform */
> + data->aead_tfm = crypto_alloc_aead("gcm(aes)", 0, 0);
> + if (IS_ERR(data->aead_tfm)) {
> + rc = PTR_ERR(data->aead_tfm);
> + data->aead_tfm = NULL;
> + return rc;
> + }
> +
> + rc = -ENOMEM;
> + data->aead_req = aead_request_alloc(data->aead_tfm, GFP_KERNEL);
> + if (data->aead_req == NULL)
> + goto setup_fail;
> +
> + /* Allocate the staging area */
> + for (i = 0; i < CHUNK_SIZE; i++) {
> + data->crypt_pages[i] = (void *)__get_free_page(GFP_ATOMIC);
> + if (data->crypt_pages[i] == NULL)
> + goto setup_fail;
> + }

The GFP_ATOMIC above should be GFP_KERNEL, given that the same function uses
GFP_KERNEL already.

> +int snapshot_get_encryption_key(struct snapshot_data *data,
> + struct uswsusp_key_blob __user *key)
> +{
> + u8 aead_key[SNAPSHOT_ENCRYPTION_KEY_SIZE];
> + u8 nonce[USWSUSP_KEY_NONCE_SIZE];
> + int rc;
> +
> + /* Don't pull a random key from a world that can be reset. */
> + if (data->ready)
> + return -EPIPE;
> +
> + rc = snapshot_setup_encryption_common(data);
> + if (rc)
> + return rc;
> +
> + /* Build a random starting nonce. */
> + get_random_bytes(nonce, sizeof(nonce));
> + memcpy(&data->nonce_low, &nonce[0], sizeof(data->nonce_low));
> + memcpy(&data->nonce_high, &nonce[8], sizeof(data->nonce_high));
> + /* Build a random key */
> + get_random_bytes(aead_key, sizeof(aead_key));
> + rc = crypto_aead_setkey(data->aead_tfm, aead_key, sizeof(aead_key));
> + if (rc)
> + goto fail;

Why not just start the nonce at 0? It's a new key.

> + rc = copy_to_user(&key->blob, &aead_key, sizeof(aead_key));
> + if (rc)
> + goto fail;
> +
> + rc = copy_to_user(&key->nonce, &nonce, sizeof(nonce));
> + if (rc)
> + goto fail;

This is not handling the return value of copy_to_user() correctly.

> +int snapshot_set_encryption_key(struct snapshot_data *data,
> + struct uswsusp_key_blob __user *key)
> +{
> + struct uswsusp_key_blob blob;
> + int rc;
> +
> + /* It's too late if data's been pushed in. */
> + if (data->handle.cur)
> + return -EPIPE;
> +
> + rc = snapshot_setup_encryption_common(data);
> + if (rc)
> + return rc;
> +
> + /* Load the key from user mode. */
> + rc = copy_from_user(&blob, key, sizeof(struct uswsusp_key_blob));
> + if (rc)
> + goto crypto_setup_fail;

Likewise, this is not handling the return value of copy_from_user() correctly.

> +
> + if (blob.blob_len != sizeof(struct uswsusp_key_blob)) {
> + rc = -EINVAL;
> + goto crypto_setup_fail;
> + }

Later in the patch series this changes to:

if (blob.blob_len > sizeof(key->blob))

So the semantics of the blob_len field changes from "size of the struct" to
"size of a field in the struct". That's confusing. Which one did you intend?

> +loff_t snapshot_get_encrypted_image_size(loff_t raw_size)
> +{
> + loff_t pages = raw_size >> PAGE_SHIFT;
> + loff_t chunks = (pages + (CHUNK_SIZE - 1)) / CHUNK_SIZE;

DIV_ROUND_UP(pages, CHUNK_SIZE)

> + /*
> + * The encrypted size is the normal size, plus a stitched in
> + * authentication tag for every chunk of pages.
> + */
> + return raw_size + (chunks * SNAPSHOT_AUTH_TAG_SIZE);
> +}
> +
> +int snapshot_finalize_decrypted_image(struct snapshot_data *data)
> +{
> + int rc;
> +
> + if (data->crypt_offset != 0) {
> + rc = snapshot_decrypt_drain(data);
> + if (rc)
> + return rc;
> + }
> +
> + return 0;

There's no need for the 'rc' variable:

if (data->crypt_offset != 0)
return snapshot_decrypt_drain(data);
return 0;

> diff --git a/kernel/power/user.c b/kernel/power/user.c
> index 3a4e70366f354c..bba5cdbd2c0239 100644
> --- a/kernel/power/user.c
> +++ b/kernel/power/user.c
> @@ -25,19 +25,10 @@
> #include <linux/uaccess.h>
>
> #include "power.h"
> +#include "user.h"
>
> static bool need_wait;
> -
> -static struct snapshot_data {
> - struct snapshot_handle handle;
> - int swap;
> - int mode;
> - bool frozen;
> - bool ready;
> - bool platform_support;
> - bool free_bitmaps;
> - dev_t dev;
> -} snapshot_state;
> +struct snapshot_data snapshot_state;

Why is this variable made non-static? It's still only used in this file.

> diff --git a/kernel/power/user.h b/kernel/power/user.h
> new file mode 100644
> index 00000000000000..ac429782abff85
> --- /dev/null
> +++ b/kernel/power/user.h
> @@ -0,0 +1,103 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#include <linux/crypto.h>
> +#include <crypto/aead.h>
> +#include <crypto/aes.h>
> +
> +#define SNAPSHOT_ENCRYPTION_KEY_SIZE AES_KEYSIZE_128

Why use AES-128 instead of AES-256?

> +#if defined(CONFIG_ENCRYPTED_HIBERNATION)

#ifdef CONFIG_ENCRYPTED_HIBERNATION

> +#else
> +
> +ssize_t snapshot_read_encrypted(struct snapshot_data *data,
> + char __user *buf, size_t count, loff_t *offp)
> +{
> + return -ENOTTY;
> +}
> +
> +ssize_t snapshot_write_encrypted(struct snapshot_data *data,
> + const char __user *buf, size_t count,
> + loff_t *offp)
> +{
> + return -ENOTTY;
> +}
> +
> +static void snapshot_teardown_encryption(struct snapshot_data *data) {}
> +static int snapshot_get_encryption_key(struct snapshot_data *data,
> + struct uswsusp_key_blob __user *key)
> +{
> + return -ENOTTY;
> +}
> +
> +static int snapshot_set_encryption_key(struct snapshot_data *data,
> + struct uswsusp_key_blob __user *key)
> +{
> + return -ENOTTY;
> +}
> +
> +static loff_t snapshot_get_encrypted_image_size(loff_t raw_size)
> +{
> + return raw_size;
> +}
> +
> +static int snapshot_finalize_decrypted_image(struct snapshot_data *data)
> +{
> + return -ENOTTY;
> +}
> +
> +#define snapshot_encryption_enabled(data) (0)

The above stubs should be 'static inline' functions.

- Eric