KVM guest: KVM Steal time registration
Glauber Costa [Mon, 11 Jul 2011 19:28:19 +0000 (15:28 -0400)]
This patch implements the kvm bits of the steal time infrastructure.
The most important part of it, is the steal time clock. It is an
continuous clock that shows the accumulated amount of steal time
since vcpu creation. It is supposed to survive cpu offlining/onlining.

[marcelo: fix build with CONFIG_KVM_GUEST=n]

Signed-off-by: Glauber Costa <glommer@redhat.com>
Acked-by: Rik van Riel <riel@redhat.com>
Tested-by: Eric B Munson <emunson@mgebm.net>
CC: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
CC: Peter Zijlstra <peterz@infradead.org>
CC: Avi Kivity <avi@redhat.com>
CC: Anthony Liguori <aliguori@us.ibm.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>

Documentation/kernel-parameters.txt
arch/x86/include/asm/kvm_para.h
arch/x86/kernel/kvm.c
arch/x86/kernel/kvmclock.c

index fd248a3..a722574 100644 (file)
@@ -1737,6 +1737,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
        no-kvmapf       [X86,KVM] Disable paravirtualized asynchronous page
                        fault handling.
 
+       no-steal-acc    [X86,KVM] Disable paravirtualized steal time accounting.
+                       steal time is computed, but won't influence scheduler
+                       behaviour
+
        nolapic         [X86-32,APIC] Do not enable or use the local APIC.
 
        nolapic_timer   [X86-32,APIC] Do not use the local APIC timer.
index c484ba8..734c376 100644 (file)
@@ -192,6 +192,7 @@ void __init kvm_guest_init(void);
 void kvm_async_pf_task_wait(u32 token);
 void kvm_async_pf_task_wake(u32 token);
 u32 kvm_read_and_reset_pf_reason(void);
+extern void kvm_disable_steal_time(void);
 #else
 #define kvm_guest_init() do { } while (0)
 #define kvm_async_pf_task_wait(T) do {} while(0)
@@ -200,6 +201,11 @@ static inline u32 kvm_read_and_reset_pf_reason(void)
 {
        return 0;
 }
+
+static inline void kvm_disable_steal_time(void)
+{
+       return;
+}
 #endif
 
 #endif /* __KERNEL__ */
index 33c07b0..a9c2116 100644 (file)
@@ -51,6 +51,15 @@ static int parse_no_kvmapf(char *arg)
 
 early_param("no-kvmapf", parse_no_kvmapf);
 
+static int steal_acc = 1;
+static int parse_no_stealacc(char *arg)
+{
+        steal_acc = 0;
+        return 0;
+}
+
+early_param("no-steal-acc", parse_no_stealacc);
+
 struct kvm_para_state {
        u8 mmu_queue[MMU_QUEUE_SIZE];
        int mmu_queue_len;
@@ -58,6 +67,8 @@ struct kvm_para_state {
 
 static DEFINE_PER_CPU(struct kvm_para_state, para_state);
 static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64);
+static DEFINE_PER_CPU(struct kvm_steal_time, steal_time) __aligned(64);
+static int has_steal_clock = 0;
 
 static struct kvm_para_state *kvm_para_state(void)
 {
@@ -441,6 +452,21 @@ static void __init paravirt_ops_setup(void)
 #endif
 }
 
+static void kvm_register_steal_time(void)
+{
+       int cpu = smp_processor_id();
+       struct kvm_steal_time *st = &per_cpu(steal_time, cpu);
+
+       if (!has_steal_clock)
+               return;
+
+       memset(st, 0, sizeof(*st));
+
+       wrmsrl(MSR_KVM_STEAL_TIME, (__pa(st) | KVM_MSR_ENABLED));
+       printk(KERN_INFO "kvm-stealtime: cpu %d, msr %lx\n",
+               cpu, __pa(st));
+}
+
 void __cpuinit kvm_guest_cpu_init(void)
 {
        if (!kvm_para_available())
@@ -457,6 +483,9 @@ void __cpuinit kvm_guest_cpu_init(void)
                printk(KERN_INFO"KVM setup async PF for cpu %d\n",
                       smp_processor_id());
        }
+
+       if (has_steal_clock)
+               kvm_register_steal_time();
 }
 
 static void kvm_pv_disable_apf(void *unused)
@@ -483,6 +512,31 @@ static struct notifier_block kvm_pv_reboot_nb = {
        .notifier_call = kvm_pv_reboot_notify,
 };
 
+static u64 kvm_steal_clock(int cpu)
+{
+       u64 steal;
+       struct kvm_steal_time *src;
+       int version;
+
+       src = &per_cpu(steal_time, cpu);
+       do {
+               version = src->version;
+               rmb();
+               steal = src->steal;
+               rmb();
+       } while ((version & 1) || (version != src->version));
+
+       return steal;
+}
+
+void kvm_disable_steal_time(void)
+{
+       if (!has_steal_clock)
+               return;
+
+       wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
+}
+
 #ifdef CONFIG_SMP
 static void __init kvm_smp_prepare_boot_cpu(void)
 {
@@ -500,6 +554,7 @@ static void __cpuinit kvm_guest_cpu_online(void *dummy)
 
 static void kvm_guest_cpu_offline(void *dummy)
 {
+       kvm_disable_steal_time();
        kvm_pv_disable_apf(NULL);
        apf_task_wake_all();
 }
@@ -548,6 +603,11 @@ void __init kvm_guest_init(void)
        if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF))
                x86_init.irqs.trap_init = kvm_apf_trap_init;
 
+       if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
+               has_steal_clock = 1;
+               pv_time_ops.steal_clock = kvm_steal_clock;
+       }
+
 #ifdef CONFIG_SMP
        smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
        register_cpu_notifier(&kvm_cpu_notifier);
@@ -555,3 +615,15 @@ void __init kvm_guest_init(void)
        kvm_guest_cpu_init();
 #endif
 }
+
+static __init int activate_jump_labels(void)
+{
+       if (has_steal_clock) {
+               jump_label_inc(&paravirt_steal_enabled);
+               if (steal_acc)
+                       jump_label_inc(&paravirt_steal_rq_enabled);
+       }
+
+       return 0;
+}
+arch_initcall(activate_jump_labels);
index 6389a6b..c1a0188 100644 (file)
@@ -160,6 +160,7 @@ static void __cpuinit kvm_setup_secondary_clock(void)
 static void kvm_crash_shutdown(struct pt_regs *regs)
 {
        native_write_msr(msr_kvm_system_time, 0, 0);
+       kvm_disable_steal_time();
        native_machine_crash_shutdown(regs);
 }
 #endif
@@ -167,6 +168,7 @@ static void kvm_crash_shutdown(struct pt_regs *regs)
 static void kvm_shutdown(void)
 {
        native_write_msr(msr_kvm_system_time, 0, 0);
+       kvm_disable_steal_time();
        native_machine_shutdown();
 }