x86/paravirt: selectively save/restore regs around pvops calls
Jeremy Fitzhardinge [Wed, 28 Jan 2009 22:35:04 +0000 (14:35 -0800)]
Impact: Optimization

Each asm paravirt-ops call says what registers are available for
clobbering.  This patch makes use of this to selectively save/restore
registers around each pvops call.  In many cases this significantly
shrinks code size.

Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

arch/x86/include/asm/paravirt.h

index 961d10c..dcce961 100644 (file)
 #define CLBR_EAX  (1 << 0)
 #define CLBR_ECX  (1 << 1)
 #define CLBR_EDX  (1 << 2)
+#define CLBR_EDI  (1 << 3)
 
-#ifdef CONFIG_X86_64
-#define CLBR_RSI  (1 << 3)
-#define CLBR_RDI  (1 << 4)
+#ifdef CONFIG_X86_32
+/* CLBR_ANY should match all regs platform has. For i386, that's just it */
+#define CLBR_ANY  ((1 << 4) - 1)
+#else
+#define CLBR_RAX  CLBR_EAX
+#define CLBR_RCX  CLBR_ECX
+#define CLBR_RDX  CLBR_EDX
+#define CLBR_RDI  CLBR_EDI
+#define CLBR_RSI  (1 << 4)
 #define CLBR_R8   (1 << 5)
 #define CLBR_R9   (1 << 6)
 #define CLBR_R10  (1 << 7)
 #define CLBR_R11  (1 << 8)
 #define CLBR_ANY  ((1 << 9) - 1)
+
+#define CLBR_ARG_REGS  (CLBR_RDI | CLBR_RSI | CLBR_RDX | \
+                        CLBR_RCX | CLBR_R8 | CLBR_R9)
+#define CLBR_RET_REG   (CLBR_RAX | CLBR_RDX)
+#define CLBR_SCRATCH   (CLBR_R10 | CLBR_R11)
+
 #include <asm/desc_defs.h>
-#else
-/* CLBR_ANY should match all regs platform has. For i386, that's just it */
-#define CLBR_ANY  ((1 << 3) - 1)
 #endif /* X86_64 */
 
 #ifndef __ASSEMBLY__
@@ -1530,33 +1540,49 @@ static inline unsigned long __raw_local_irq_save(void)
        .popsection
 
 
+#define COND_PUSH(set, mask, reg)                      \
+       .if ((~set) & mask); push %reg; .endif
+#define COND_POP(set, mask, reg)                       \
+       .if ((~set) & mask); pop %reg; .endif
+
 #ifdef CONFIG_X86_64
-#define PV_SAVE_REGS                           \
-       push %rax;                              \
-       push %rcx;                              \
-       push %rdx;                              \
-       push %rsi;                              \
-       push %rdi;                              \
-       push %r8;                               \
-       push %r9;                               \
-       push %r10;                              \
-       push %r11
-#define PV_RESTORE_REGS                                \
-       pop %r11;                               \
-       pop %r10;                               \
-       pop %r9;                                \
-       pop %r8;                                \
-       pop %rdi;                               \
-       pop %rsi;                               \
-       pop %rdx;                               \
-       pop %rcx;                               \
-       pop %rax
+
+#define PV_SAVE_REGS(set)                      \
+       COND_PUSH(set, CLBR_RAX, rax);          \
+       COND_PUSH(set, CLBR_RCX, rcx);          \
+       COND_PUSH(set, CLBR_RDX, rdx);          \
+       COND_PUSH(set, CLBR_RSI, rsi);          \
+       COND_PUSH(set, CLBR_RDI, rdi);          \
+       COND_PUSH(set, CLBR_R8, r8);            \
+       COND_PUSH(set, CLBR_R9, r9);            \
+       COND_PUSH(set, CLBR_R10, r10);          \
+       COND_PUSH(set, CLBR_R11, r11)
+#define PV_RESTORE_REGS(set)                   \
+       COND_POP(set, CLBR_R11, r11);           \
+       COND_POP(set, CLBR_R10, r10);           \
+       COND_POP(set, CLBR_R9, r9);             \
+       COND_POP(set, CLBR_R8, r8);             \
+       COND_POP(set, CLBR_RDI, rdi);           \
+       COND_POP(set, CLBR_RSI, rsi);           \
+       COND_POP(set, CLBR_RDX, rdx);           \
+       COND_POP(set, CLBR_RCX, rcx);           \
+       COND_POP(set, CLBR_RAX, rax)
+
 #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 8)
 #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .quad, 8)
 #define PARA_INDIRECT(addr)    *addr(%rip)
 #else
-#define PV_SAVE_REGS   pushl %eax; pushl %edi; pushl %ecx; pushl %edx
-#define PV_RESTORE_REGS popl %edx; popl %ecx; popl %edi; popl %eax
+#define PV_SAVE_REGS(set)                      \
+       COND_PUSH(set, CLBR_EAX, eax);          \
+       COND_PUSH(set, CLBR_EDI, edi);          \
+       COND_PUSH(set, CLBR_ECX, ecx);          \
+       COND_PUSH(set, CLBR_EDX, edx)
+#define PV_RESTORE_REGS(set)                   \
+       COND_POP(set, CLBR_EDX, edx);           \
+       COND_POP(set, CLBR_ECX, ecx);           \
+       COND_POP(set, CLBR_EDI, edi);           \
+       COND_POP(set, CLBR_EAX, eax)
+
 #define PARA_PATCH(struct, off)        ((PARAVIRT_PATCH_##struct + (off)) / 4)
 #define PARA_SITE(ptype, clobbers, ops) _PVSITE(ptype, clobbers, ops, .long, 4)
 #define PARA_INDIRECT(addr)    *%cs:addr
@@ -1568,15 +1594,15 @@ static inline unsigned long __raw_local_irq_save(void)
 
 #define DISABLE_INTERRUPTS(clobbers)                                   \
        PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_disable), clobbers, \
-                 PV_SAVE_REGS;                                         \
+                 PV_SAVE_REGS(clobbers);                               \
                  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_disable);    \
-                 PV_RESTORE_REGS;)                     \
+                 PV_RESTORE_REGS(clobbers);)
 
 #define ENABLE_INTERRUPTS(clobbers)                                    \
        PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_irq_enable), clobbers,  \
-                 PV_SAVE_REGS;                                         \
+                 PV_SAVE_REGS(clobbers);                               \
                  call PARA_INDIRECT(pv_irq_ops+PV_IRQ_irq_enable);     \
-                 PV_RESTORE_REGS;)
+                 PV_RESTORE_REGS(clobbers);)
 
 #define USERGS_SYSRET32                                                        \
        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret32),       \
@@ -1606,11 +1632,15 @@ static inline unsigned long __raw_local_irq_save(void)
        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,     \
                  swapgs)
 
+/*
+ * Note: swapgs is very special, and in practise is either going to be
+ * implemented with a single "swapgs" instruction or something very
+ * special.  Either way, we don't need to save any registers for
+ * it.
+ */
 #define SWAPGS                                                         \
        PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_swapgs), CLBR_NONE,     \
-                 PV_SAVE_REGS;                                         \
-                 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs);         \
-                 PV_RESTORE_REGS                                       \
+                 call PARA_INDIRECT(pv_cpu_ops+PV_CPU_swapgs)          \
                 )
 
 #define GET_CR2_INTO_RCX                               \