Re: [RFC Part1 PATCH v3 13/17] x86/io: Unroll string I/O when SEV is active

From: H. Peter Anvin
Date: Wed Jul 26 2017 - 15:46:24 EST


<ard.biesheuvel@xxxxxxxxxx>,Eric Biederman <ebiederm@xxxxxxxxxxxx>,Tejun Heo <tj@xxxxxxxxxx>,Paolo Bonzini <pbonzini@xxxxxxxxxx>,Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>,"Kirill A . Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx>,Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
From: hpa@xxxxxxxxx
Message-ID: <AF533772-BD88-4EDA-AD26-7EFA2878F123@xxxxxxxxx>

On July 26, 2017 9:24:45 PM GMT+02:00, Brijesh Singh <brijesh.singh@xxxxxxx> wrote:
>
>Hi Arnd and David,
>
>On 07/26/2017 05:45 AM, Arnd Bergmann wrote:
>> On Tue, Jul 25, 2017 at 11:51 AM, David Laight
><David.Laight@xxxxxxxxxx> wrote:
>>> From: Brijesh Singh
>>>> Sent: 24 July 2017 20:08
>>>> From: Tom Lendacky <thomas.lendacky@xxxxxxx>
>>>>
>>>> Secure Encrypted Virtualization (SEV) does not support string I/O,
>so
>>>> unroll the string I/O operation into a loop operating on one
>element at
>>>> a time.
>>>>
>>>> Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx>
>>>> Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx>
>>>> ---
>>>> arch/x86/include/asm/io.h | 26 ++++++++++++++++++++++----
>>>> 1 file changed, 22 insertions(+), 4 deletions(-)
>>>>
>>>> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
>>>> index e080a39..2f3c002 100644
>>>> --- a/arch/x86/include/asm/io.h
>>>> +++ b/arch/x86/include/asm/io.h
>>>> @@ -327,14 +327,32 @@ static inline unsigned type in##bwl##_p(int
>port) \
>>>>
> \
>>>> static inline void outs##bwl(int port, const void *addr, unsigned
>long count) \
>>>> {
>>
>> This will clash with a fix I did to add a "memory" clobber
>> for the traditional implementation, see
>> https://patchwork.kernel.org/patch/9854573/
>>
>>> Is it even worth leaving these as inline functions?
>>> Given the speed of IO cycles it is unlikely that the cost of calling
>a real
>>> function will be significant.
>>> The code bloat reduction will be significant.
>>
>> I think the smallest code would be the original "rep insb" etc, which
>> should be smaller than a function call, unlike the loop. Then again,
>> there is a rather small number of affected device drivers, almost all
>> of them for ancient hardware that you won't even build in a 64-bit
>> x86 kernel, see the list below. The only user I found that is
>actually
>> still relevant is drivers/tty/hvc/hvc_xen.c, which uses it for the
>early
>> console.
>
>
>There are some indirect user of string I/O functions. The following
>functions
>defined in lib/iomap.c calls rep version of ins and outs.
>
>- ioread8_rep, ioread16_rep, ioread32_rep
>- iowrite8_rep, iowrite16_rep, iowrite32_rep
>
>I found that several drivers use above functions.
>
>Here is one approach to convert it into non-inline functions. In this
>approach,
>I have added a new file arch/x86/kernel/io.c which provides non rep
>version of
>string I/O routines. The file gets built and used only when
>AMD_MEM_ENCRYPT is
>enabled. On positive side, if we don't build kernel with
>AMD_MEM_ENCRYPT support
>then we use inline routines, when AMD_MEM_ENCRYPT is built then we make
>a function
>call. Inside the function we unroll only when SEV is active.
>
>Do you see any issue with this approach ? thanks
>
>diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
>index e080a39..104927d 100644
>--- a/arch/x86/include/asm/io.h
>+++ b/arch/x86/include/asm/io.h
>@@ -323,8 +323,9 @@ static inline unsigned type in##bwl##_p(int port)
> \
> unsigned type value = in##bwl(port); \
> slow_down_io(); \
> return value; \
>-}
>\
>-
>\
>+}
>+
>+#define BUILDIO_REP(bwl, bw, type)
>\
>static inline void outs##bwl(int port, const void *addr, unsigned long
>count) \
>{
>\
> asm volatile("rep; outs" #bwl \
>@@ -335,12 +336,31 @@ static inline void ins##bwl(int port, void *addr,
>unsigned long count) \
>{
>\
> asm volatile("rep; ins" #bwl \
> : "+D"(addr), "+c"(count) : "d"(port)); \
>-}
>+}
>\
>
> BUILDIO(b, b, char)
> BUILDIO(w, w, short)
> BUILDIO(l, , int)
>
>+#ifdef CONFIG_AMD_MEM_ENCRYPT
>+extern void outsb_try_rep(int port, const void *addr, unsigned long
>count);
>+extern void insb_try_rep(int port, void *addr, unsigned long count);
>+extern void outsw_try_rep(int port, const void *addr, unsigned long
>count);
>+extern void insw_try_rep(int port, void *addr, unsigned long count);
>+extern void outsl_try_rep(int port, const void *addr, unsigned long
>count);
>+extern void insl_try_rep(int port, void *addr, unsigned long count);
>+#define outsb outsb_try_rep
>+#define insb insb_try_rep
>+#define outsw outsw_try_rep
>+#define insw insw_try_rep
>+#define outsl outsl_try_rep
>+#define insl insl_try_rep
>+#else
>+BUILDIO_REP(b, b, char)
>+BUILDIO_REP(w, w, short)
>+BUILDIO_REP(l, , int)
>+#endif
>+
> extern void *xlate_dev_mem_ptr(phys_addr_t phys);
> extern void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
>
>diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
>index a01892b..3b6e2a3 100644
>--- a/arch/x86/kernel/Makefile
>+++ b/arch/x86/kernel/Makefile
>@@ -42,6 +42,7 @@ CFLAGS_irq.o := -I$(src)/../include/asm/trace
>
> obj-y := process_$(BITS).o signal.o
> obj-$(CONFIG_COMPAT) += signal_compat.o
>+obj-$(CONFIG_AMD_MEM_ENCRYPT) += io.o
>obj-y += traps.o irq.o irq_$(BITS).o
>dumpstack_$(BITS).o
> obj-y += time.o ioport.o dumpstack.o nmi.o
> obj-$(CONFIG_MODIFY_LDT_SYSCALL) += ldt.o
>diff --git a/arch/x86/kernel/io.c b/arch/x86/kernel/io.c
>new file mode 100644
>index 0000000..f58afa9
>--- /dev/null
>+++ b/arch/x86/kernel/io.c
>@@ -0,0 +1,87 @@
>+#include <linux/types.h>
>+#include <linux/io.h>
>+#include <asm/io.h>
>+
>+void outsb_try_rep(int port, const void *addr, unsigned long count)
>+{
>+ if (sev_active()) {
>+ unsigned char *value = (unsigned char *)addr;
>+ while (count) {
>+ outb(*value, port);
>+ value++;
>+ count--;
>+ }
>+ } else {
>+ asm volatile("rep; outsb" : "+S"(addr), "+c"(count) :
>"d"(port));
>+ }
>+}
>+
>+void insb_try_rep(int port, void *addr, unsigned long count)
>+{
>+ if (sev_active()) {
>+ unsigned char *value = (unsigned char *)addr;
>+ while (count) {
>+ *value = inb(port);
>+ value++;
>+ count--;
>+ }
>+ } else {
>+ asm volatile("rep; insb" : "+D"(addr), "+c"(count) :
>"d"(port));
>+ }
>+}
>+
>+void outsw_try_rep(int port, const void *addr, unsigned long count)
>+{
>+ if (sev_active()) {
>+ unsigned short *value = (unsigned short *)addr;
>+ while (count) {
>+ outw(*value, port);
>+ value++;
>+ count--;
>+ }
>+ } else {
>+ asm volatile("rep; outsw" : "+S"(addr), "+c"(count) :
>"d"(port));
>+ }
>+}
>+void insw_try_rep(int port, void *addr, unsigned long count)
>+{
>+ if (sev_active()) {
>+ unsigned short *value = (unsigned short *)addr;
>+ while (count) {
>+ *value = inw(port);
>+ value++;
>+ count--;
>+ }
>+ } else {
>+ asm volatile("rep; insw" : "+D"(addr), "+c"(count) :
>"d"(port));
>+ }
>+}
>+
>+void outsl_try_rep(int port, const void *addr, unsigned long count)
>+{
>+ if (sev_active()) {
>+ unsigned int *value = (unsigned int *)addr;
>+ while (count) {
>+ outl(*value, port);
>+ value++;
>+ count--;
>+ }
>+ } else {
>+ asm volatile("rep; outsl" : "+S"(addr), "+c"(count) :
>"d"(port));
>+ }
>+}
>+
>+void insl_try_rep(int port, void *addr, unsigned long count)
>+{
>+ if (sev_active()) {
>+ unsigned int *value = (unsigned int *)addr;
>+ while (count) {
>+ *value = inl(port);
>+ value++;
>+ count--;
>+ }
>+ } else {
>+ asm volatile("rep; insl" : "+D"(addr), "+c"(count) :
>"d"(port));
>+ }
>+}

What the heck?
--
Sent from my Android device with K-9 Mail. Please excuse my brevity.