Re: [PATCH 04/12] __wr_after_init: x86_64: __wr_op

From: Thiago Jung Bauermann
Date: Thu Dec 20 2018 - 12:20:49 EST



Hello Igor,

> +/*
> + * The following two variables are statically allocated by the linker
> + * script at the the boundaries of the memory region (rounded up to
> + * multiples of PAGE_SIZE) reserved for __wr_after_init.
> + */
> +extern long __start_wr_after_init;
> +extern long __end_wr_after_init;
> +
> +static inline bool is_wr_after_init(unsigned long ptr, __kernel_size_t size)
> +{
> + unsigned long start = (unsigned long)&__start_wr_after_init;
> + unsigned long end = (unsigned long)&__end_wr_after_init;
> + unsigned long low = ptr;
> + unsigned long high = ptr + size;
> +
> + return likely(start <= low && low <= high && high <= end);
> +}
> +
> +void *__wr_op(unsigned long dst, unsigned long src, __kernel_size_t len,
> + enum wr_op_type op)
> +{
> + temporary_mm_state_t prev;
> + unsigned long offset;
> + unsigned long wr_poking_addr;
> +
> + /* Confirm that the writable mapping exists. */
> + if (WARN_ONCE(!wr_ready, "No writable mapping available"))
> + return (void *)dst;
> +
> + if (WARN_ONCE(op >= WR_OPS_NUMBER, "Invalid WR operation.") ||
> + WARN_ONCE(!is_wr_after_init(dst, len), "Invalid WR range."))
> + return (void *)dst;
> +
> + offset = dst - (unsigned long)&__start_wr_after_init;
> + wr_poking_addr = wr_poking_base + offset;
> + local_irq_disable();
> + prev = use_temporary_mm(wr_poking_mm);
> +
> + if (op == WR_MEMCPY)
> + copy_to_user((void __user *)wr_poking_addr, (void *)src, len);
> + else if (op == WR_MEMSET)
> + memset_user((void __user *)wr_poking_addr, (u8)src, len);
> +
> + unuse_temporary_mm(prev);
> + local_irq_enable();
> + return (void *)dst;
> +}

There's a lot of casting back and forth between unsigned long and void *
(also in the previous patch). Is there a reason for that? My impression
is that there would be less casts if variables holding addresses were
declared as void * in the first place. In that case, it wouldn't hurt to
have an additional argument in __rw_op() to carry the byte value for the
WR_MEMSET operation.

> +
> +#define TB (1UL << 40)
> +
> +struct mm_struct *copy_init_mm(void);
> +void __init wr_poking_init(void)
> +{
> + unsigned long start = (unsigned long)&__start_wr_after_init;
> + unsigned long end = (unsigned long)&__end_wr_after_init;
> + unsigned long i;
> + unsigned long wr_range;
> +
> + wr_poking_mm = copy_init_mm();
> + if (WARN_ONCE(!wr_poking_mm, "No alternate mapping available."))
> + return;
> +
> + wr_range = round_up(end - start, PAGE_SIZE);
> +
> + /* Randomize the poking address base*/
> + wr_poking_base = TASK_UNMAPPED_BASE +
> + (kaslr_get_random_long("Write Rare Poking") & PAGE_MASK) %
> + (TASK_SIZE - (TASK_UNMAPPED_BASE + wr_range));
> +
> + /*
> + * Place 64TB of kernel address space within 128TB of user address
> + * space, at a random page aligned offset.
> + */
> + wr_poking_base = (((unsigned long)kaslr_get_random_long("WR Poke")) &
> + PAGE_MASK) % (64 * _BITUL(40));

You're setting wr_poking_base twice in a row? Is this an artifact from
rebase?

--
Thiago Jung Bauermann
IBM Linux Technology Center