[patch 7/8] Immediate Value - PowerPC Optimization
From: Mathieu Desnoyers
Date: Fri Jul 13 2007 - 21:30:34 EST
PowerPC optimization of the immediate values which uses a li instruction,
patched with an immediate value.
Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxx>
---
arch/powerpc/kernel/Makefile | 1
arch/powerpc/kernel/immediate.c | 98 ++++++++++++++++++++++++++++++++++++
include/asm-powerpc/immediate.h | 107 +++++++++++++++++++++++++++++++++++++++-
3 files changed, 205 insertions(+), 1 deletion(-)
Index: linux-2.6-lttng/include/asm-powerpc/immediate.h
===================================================================
--- linux-2.6-lttng.orig/include/asm-powerpc/immediate.h 2007-07-13 19:26:18.000000000 -0400
+++ linux-2.6-lttng/include/asm-powerpc/immediate.h 2007-07-13 19:28:49.000000000 -0400
@@ -1 +1,106 @@
-#include <asm-generic/immediate.h>
+#ifndef _ASM_POWERPC_IMMEDIATE_H
+#define _ASM_POWERPC_IMMEDIATE_H
+
+/*
+ * Immediate values. PowerPC architecture optimizations.
+ *
+ * (C) Copyright 2006 Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxx>
+ *
+ * This file is released under the GPLv2.
+ * See the file COPYING for more details.
+ */
+
+#include <asm/asm-compat.h>
+
+struct module;
+
+struct __immediate {
+ long var; /* Identifier variable of the immediate value */
+ long immediate; /*
+ * Pointer to the memory location that holds
+ * the immediate value within the load immediate
+ * instruction.
+ */
+ long size; /* Type size. */
+};
+
+/*
+ * Optimized version of the immediate.
+ * Make sure the 2 bytes update will be atomic by aligning the immediate
+ * value.
+ * Use a normal memory read for the 4 bytes immediate because there is no way to
+ * atomically update it without using a seqlock read side, which would cost more
+ * in term of total i-cache and d-cache space than a simple memory read.
+ * Do not use in __init and __exit functions. Use _immediate_read() instead.
+ */
+#define immediate_read(var) \
+ ({ \
+ __typeof__((var)->value) value; \
+ switch (sizeof(value)) { \
+ case 1: \
+ asm ( ".section __immediate, \"a\", @progbits;\n\t" \
+ PPC_LONG "%1, ((0f)+3), 1;\n\t" \
+ ".previous;\n\t" \
+ "0:\n\t" \
+ "li %0,%2;\n\t" \
+ : "=r" (value) \
+ : "i" (&(var)->value), \
+ "i" (0)); \
+ break; \
+ case 2: \
+ asm ( ".section __immediate, \"a\", @progbits;\n\t" \
+ PPC_LONG "%1, ((0f)+2), 2;\n\t" \
+ ".previous;\n\t" \
+ ".align 2\n\t" \
+ "0:\n\t" \
+ "li %0,%2;\n\t" \
+ : "=r" (value) \
+ : "i" (&(var)->value), \
+ "i" (0)); \
+ break; \
+ default: \
+ value = (var)->value; \
+ break; \
+ }; \
+ value; \
+ })
+
+/*
+ * Update immediate value, can take module mutex.
+ */
+#define immediate_set(var, i) \
+ (var)->value = (i); \
+ immediate_update(1);
+
+/*
+ * Update immediate value. Module mutex must already be taken.
+ */
+#define _immediate_set(var, i) \
+ (var)->value = (i); \
+ immediate_update(0);
+
+/*
+ * Update immediate value at early boot.
+ */
+#define immediate_set_early(var, i) \
+ (var)->value = (i); \
+ immediate_update_early();
+
+/*
+ * Branch depending on an immediate value. Could eventually be optimized further
+ * by improving gcc to give the ability to patch a jump instruction instead of
+ * the value it depends on.
+ * Do not use in __init and __exit functions. Use _immediate_if() instead.
+ */
+#define immediate_if(var) if (unlikely(immediate_read(var)))
+
+/*
+ * Used internally.
+ */
+extern void immediate_update(int lock);
+extern void module_immediate_setup(struct module *mod);
+extern void immediate_update_early(void);
+extern int arch_immediate_update(const struct __immediate *immediate);
+extern void arch_immediate_update_early(const struct __immediate *immediate);
+
+#endif /* _ASM_POWERPC_IMMEDIATE_H */
Index: linux-2.6-lttng/arch/powerpc/kernel/Makefile
===================================================================
--- linux-2.6-lttng.orig/arch/powerpc/kernel/Makefile 2007-07-13 19:26:18.000000000 -0400
+++ linux-2.6-lttng/arch/powerpc/kernel/Makefile 2007-07-13 19:28:29.000000000 -0400
@@ -103,3 +103,4 @@
extra-$(CONFIG_PPC_FPU) += fpu.o
extra-$(CONFIG_PPC64) += entry_64.o
+obj-$(CONFIG_IMMEDIATE) += immediate.o
Index: linux-2.6-lttng/arch/powerpc/kernel/immediate.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6-lttng/arch/powerpc/kernel/immediate.c 2007-07-13 19:28:29.000000000 -0400
@@ -0,0 +1,98 @@
+/*
+ * Powerpc optimized immediate values enabling/disabling.
+ *
+ * Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxx>
+ */
+
+#include <linux/module.h>
+#include <linux/immediate.h>
+#include <linux/string.h>
+#include <linux/kprobes.h>
+#include <asm/cacheflush.h>
+#include <asm/page.h>
+
+#define LI_OPCODE_LEN 2
+
+/*
+ * The immediate value are aligned.
+ */
+int arch_immediate_update(const struct __immediate *immediate)
+{
+#ifdef CONFIG_KPROBES
+ kprobe_opcode_t *insn;
+ /*
+ * Fail if a kprobe has been set on this instruction.
+ * (TODO: we could eventually do better and modify all the (possibly
+ * nested) kprobes for this site if kprobes had an API for this.
+ */
+ switch (immediate->size) {
+ case 1: /* The uint8_t points to the 3rd byte of the
+ * instruction */
+ insn = (void*)(immediate->immediate - 1 - LI_OPCODE_LEN);
+ break;
+ case 2: insn = (void*)(immediate->immediate - LI_OPCODE_LEN);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (unlikely(*insn == BREAKPOINT_INSTRUCTION)) {
+ printk(KERN_WARNING "Immediate value in conflict with kprobe. "
+ "Variable at %p, "
+ "instruction at %p, size %lu\n",
+ (void*)immediate->immediate,
+ (void*)immediate->var, immediate->size);
+ return -EBUSY;
+ }
+#endif
+
+ /*
+ * If the variable and the instruction have the same value, there is
+ * nothing to do.
+ */
+ switch (immediate->size) {
+ case 1: if (*(uint8_t*)immediate->immediate
+ == *(uint8_t*)immediate->var)
+ return 0;
+ break;
+ case 2: if (*(uint16_t*)immediate->immediate
+ == *(uint16_t*)immediate->var)
+ return 0;
+ break;
+ default:return -EINVAL;
+ }
+ memcpy((void*)immediate->immediate, (void*)immediate->var,
+ immediate->size);
+ flush_icache_range((unsigned long)immediate->immediate,
+ immediate->size);
+ return 0;
+}
+
+/*
+ * Very early initialization of the in-core immediate values.
+ * We can use flush_icache_range, since the cpu identification has been done in
+ * the early_init stage.
+ */
+void __init arch_immediate_update_early(const struct __immediate *immediate)
+{
+ /*
+ * If the variable and the instruction have the same value, there is
+ * nothing to do.
+ */
+ switch (immediate->size) {
+ case 1: if (*(uint8_t*)immediate->immediate
+ == *(uint8_t*)immediate->var)
+ return;
+ break;
+ case 2: if (*(uint16_t*)immediate->immediate
+ == *(uint16_t*)immediate->var)
+ return;
+ break;
+ default:return;
+ }
+ memcpy((void*)immediate->immediate, (void*)immediate->var,
+ immediate->size);
+ flush_icache_range((unsigned long)immediate->immediate,
+ immediate->size);
+}
+
--
Mathieu Desnoyers
Computer Engineering Ph.D. Student, Ecole Polytechnique de Montreal
OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/