xen: release unused free memory
[linux-3.10.git] / arch / x86 / xen / setup.c
1 /*
2  * Machine specific setup for xen
3  *
4  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
5  */
6
7 #include <linux/module.h>
8 #include <linux/sched.h>
9 #include <linux/mm.h>
10 #include <linux/pm.h>
11
12 #include <asm/elf.h>
13 #include <asm/vdso.h>
14 #include <asm/e820.h>
15 #include <asm/setup.h>
16 #include <asm/acpi.h>
17 #include <asm/xen/hypervisor.h>
18 #include <asm/xen/hypercall.h>
19
20 #include <xen/page.h>
21 #include <xen/interface/callback.h>
22 #include <xen/interface/physdev.h>
23 #include <xen/interface/memory.h>
24 #include <xen/features.h>
25
26 #include "xen-ops.h"
27 #include "vdso.h"
28
29 /* These are code, but not functions.  Defined in entry.S */
30 extern const char xen_hypervisor_callback[];
31 extern const char xen_failsafe_callback[];
32 extern void xen_sysenter_target(void);
33 extern void xen_syscall_target(void);
34 extern void xen_syscall32_target(void);
35
36 static unsigned long __init xen_release_chunk(phys_addr_t start_addr, phys_addr_t end_addr)
37 {
38         struct xen_memory_reservation reservation = {
39                 .address_bits = 0,
40                 .extent_order = 0,
41                 .domid        = DOMID_SELF
42         };
43         unsigned long *mfn_list = (unsigned long *)xen_start_info->mfn_list;
44         unsigned long start, end;
45         unsigned long len;
46         unsigned long pfn;
47         int ret;
48
49         start = PFN_UP(start_addr);
50         end = PFN_UP(end_addr);
51
52         if (end <= start)
53                 return 0;
54
55         len = end - start;
56
57         set_xen_guest_handle(reservation.extent_start, &mfn_list[start]);
58         reservation.nr_extents = len;
59
60         ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
61         WARN(ret != (end - start), "Failed to release memory %lx-%lx err=%d\n",
62              start, end, ret);
63
64         for(pfn = start; pfn < end; pfn++)
65                 set_phys_to_machine(pfn, INVALID_P2M_ENTRY);
66
67         return len;
68 }
69
70 static unsigned long __init xen_return_unused_memory(const struct e820map *e820)
71 {
72         unsigned long last_end = 0;
73         unsigned long released = 0;
74         int i;
75
76         for (i = 0; i < e820->nr_map; i++) {
77                 released += xen_release_chunk(last_end, e820->map[i].addr);
78                 last_end = e820->map[i].addr + e820->map[i].size;
79         }
80
81         released += xen_release_chunk(last_end, PFN_PHYS(xen_start_info->nr_pages));
82
83         printk(KERN_INFO "released %ld pages of unused memory\n", released);
84         return released;
85 }
86
87 /**
88  * machine_specific_memory_setup - Hook for machine specific memory setup.
89  **/
90
91 char * __init xen_memory_setup(void)
92 {
93         unsigned long max_pfn = xen_start_info->nr_pages;
94
95         max_pfn = min(MAX_DOMAIN_PAGES, max_pfn);
96
97         e820.nr_map = 0;
98
99         e820_add_region(0, PFN_PHYS((u64)max_pfn), E820_RAM);
100
101         /*
102          * Even though this is normal, usable memory under Xen, reserve
103          * ISA memory anyway because too many things think they can poke
104          * about in there.
105          */
106         e820_add_region(ISA_START_ADDRESS, ISA_END_ADDRESS - ISA_START_ADDRESS,
107                         E820_RESERVED);
108
109         /*
110          * Reserve Xen bits:
111          *  - mfn_list
112          *  - xen_start_info
113          * See comment above "struct start_info" in <xen/interface/xen.h>
114          */
115         reserve_early(__pa(xen_start_info->mfn_list),
116                       __pa(xen_start_info->pt_base),
117                         "XEN START INFO");
118
119         sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
120
121         xen_return_unused_memory(&e820);
122
123         return "Xen";
124 }
125
126 static void xen_idle(void)
127 {
128         local_irq_disable();
129
130         if (need_resched())
131                 local_irq_enable();
132         else {
133                 current_thread_info()->status &= ~TS_POLLING;
134                 smp_mb__after_clear_bit();
135                 safe_halt();
136                 current_thread_info()->status |= TS_POLLING;
137         }
138 }
139
140 /*
141  * Set the bit indicating "nosegneg" library variants should be used.
142  * We only need to bother in pure 32-bit mode; compat 32-bit processes
143  * can have un-truncated segments, so wrapping around is allowed.
144  */
145 static void __init fiddle_vdso(void)
146 {
147 #ifdef CONFIG_X86_32
148         u32 *mask;
149         mask = VDSO32_SYMBOL(&vdso32_int80_start, NOTE_MASK);
150         *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
151         mask = VDSO32_SYMBOL(&vdso32_sysenter_start, NOTE_MASK);
152         *mask |= 1 << VDSO_NOTE_NONEGSEG_BIT;
153 #endif
154 }
155
156 static __cpuinit int register_callback(unsigned type, const void *func)
157 {
158         struct callback_register callback = {
159                 .type = type,
160                 .address = XEN_CALLBACK(__KERNEL_CS, func),
161                 .flags = CALLBACKF_mask_events,
162         };
163
164         return HYPERVISOR_callback_op(CALLBACKOP_register, &callback);
165 }
166
167 void __cpuinit xen_enable_sysenter(void)
168 {
169         int ret;
170         unsigned sysenter_feature;
171
172 #ifdef CONFIG_X86_32
173         sysenter_feature = X86_FEATURE_SEP;
174 #else
175         sysenter_feature = X86_FEATURE_SYSENTER32;
176 #endif
177
178         if (!boot_cpu_has(sysenter_feature))
179                 return;
180
181         ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
182         if(ret != 0)
183                 setup_clear_cpu_cap(sysenter_feature);
184 }
185
186 void __cpuinit xen_enable_syscall(void)
187 {
188 #ifdef CONFIG_X86_64
189         int ret;
190
191         ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
192         if (ret != 0) {
193                 printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
194                 /* Pretty fatal; 64-bit userspace has no other
195                    mechanism for syscalls. */
196         }
197
198         if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
199                 ret = register_callback(CALLBACKTYPE_syscall32,
200                                         xen_syscall32_target);
201                 if (ret != 0)
202                         setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
203         }
204 #endif /* CONFIG_X86_64 */
205 }
206
207 void __init xen_arch_setup(void)
208 {
209         struct physdev_set_iopl set_iopl;
210         int rc;
211
212         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_4gb_segments);
213         HYPERVISOR_vm_assist(VMASST_CMD_enable, VMASST_TYPE_writable_pagetables);
214
215         if (!xen_feature(XENFEAT_auto_translated_physmap))
216                 HYPERVISOR_vm_assist(VMASST_CMD_enable,
217                                      VMASST_TYPE_pae_extended_cr3);
218
219         if (register_callback(CALLBACKTYPE_event, xen_hypervisor_callback) ||
220             register_callback(CALLBACKTYPE_failsafe, xen_failsafe_callback))
221                 BUG();
222
223         xen_enable_sysenter();
224         xen_enable_syscall();
225
226         set_iopl.iopl = 1;
227         rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl);
228         if (rc != 0)
229                 printk(KERN_INFO "physdev_op failed %d\n", rc);
230
231 #ifdef CONFIG_ACPI
232         if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
233                 printk(KERN_INFO "ACPI in unprivileged domain disabled\n");
234                 disable_acpi();
235         }
236 #endif
237
238         memcpy(boot_command_line, xen_start_info->cmd_line,
239                MAX_GUEST_CMDLINE > COMMAND_LINE_SIZE ?
240                COMMAND_LINE_SIZE : MAX_GUEST_CMDLINE);
241
242         pm_idle = xen_idle;
243
244         paravirt_disable_iospace();
245
246         fiddle_vdso();
247 }