blob: c9df991150bb158efff00e2040fca20025a20095 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86-64/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 * X86-64 port
10 * Andi Kleen.
Ashok Raj76e4f662005-06-25 14:55:00 -070011 *
12 * CPU hotplug support - ashok.raj@intel.com
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
14 */
15
16/*
17 * This file handles the architecture-dependent parts of process handling..
18 */
19
20#include <stdarg.h>
21
Ashok Raj76e4f662005-06-25 14:55:00 -070022#include <linux/cpu.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/errno.h>
24#include <linux/sched.h>
25#include <linux/kernel.h>
26#include <linux/mm.h>
27#include <linux/elfcore.h>
28#include <linux/smp.h>
29#include <linux/slab.h>
30#include <linux/user.h>
31#include <linux/module.h>
32#include <linux/a.out.h>
33#include <linux/interrupt.h>
34#include <linux/delay.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/ptrace.h>
36#include <linux/utsname.h>
37#include <linux/random.h>
Rusty Lynch73649da2005-06-23 00:09:23 -070038#include <linux/kprobes.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
40#include <asm/uaccess.h>
41#include <asm/pgtable.h>
42#include <asm/system.h>
43#include <asm/io.h>
44#include <asm/processor.h>
45#include <asm/i387.h>
46#include <asm/mmu_context.h>
47#include <asm/pda.h>
48#include <asm/prctl.h>
49#include <asm/kdebug.h>
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
53
54asmlinkage extern void ret_from_fork(void);
55
56unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
57
Linus Torvalds1da177e2005-04-16 15:20:36 -070058unsigned long boot_option_idle_override = 0;
59EXPORT_SYMBOL(boot_option_idle_override);
60
61/*
62 * Powermanagement idle function, if any..
63 */
64void (*pm_idle)(void);
65static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
66
Linus Torvalds1da177e2005-04-16 15:20:36 -070067/*
68 * We use this if we don't have any better
69 * idle routine..
70 */
71void default_idle(void)
72{
Nick Piggin64c7c8f2005-11-08 21:39:04 -080073 local_irq_enable();
74
Andi Kleen2d52ede2006-01-11 22:42:42 +010075 clear_thread_flag(TIF_POLLING_NRFLAG);
76 smp_mb__after_clear_bit();
77 while (!need_resched()) {
78 local_irq_disable();
79 if (!need_resched())
80 safe_halt();
81 else
82 local_irq_enable();
Linus Torvalds1da177e2005-04-16 15:20:36 -070083 }
Andi Kleen2d52ede2006-01-11 22:42:42 +010084 set_thread_flag(TIF_POLLING_NRFLAG);
Linus Torvalds1da177e2005-04-16 15:20:36 -070085}
86
87/*
88 * On SMP it's slightly faster (but much more power-consuming!)
89 * to poll the ->need_resched flag instead of waiting for the
90 * cross-CPU IPI to arrive. Use this option with caution.
91 */
92static void poll_idle (void)
93{
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 local_irq_enable();
95
Nick Piggin64c7c8f2005-11-08 21:39:04 -080096 asm volatile(
97 "2:"
98 "testl %0,%1;"
99 "rep; nop;"
100 "je 2b;"
101 : :
102 "i" (_TIF_NEED_RESCHED),
103 "m" (current_thread_info()->flags));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104}
105
106void cpu_idle_wait(void)
107{
108 unsigned int cpu, this_cpu = get_cpu();
109 cpumask_t map;
110
111 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
112 put_cpu();
113
114 cpus_clear(map);
115 for_each_online_cpu(cpu) {
116 per_cpu(cpu_idle_state, cpu) = 1;
117 cpu_set(cpu, map);
118 }
119
120 __get_cpu_var(cpu_idle_state) = 0;
121
122 wmb();
123 do {
124 ssleep(1);
125 for_each_online_cpu(cpu) {
Andi Kleena88cde12005-11-05 17:25:54 +0100126 if (cpu_isset(cpu, map) &&
127 !per_cpu(cpu_idle_state, cpu))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128 cpu_clear(cpu, map);
129 }
130 cpus_and(map, map, cpu_online_map);
131 } while (!cpus_empty(map));
132}
133EXPORT_SYMBOL_GPL(cpu_idle_wait);
134
Ashok Raj76e4f662005-06-25 14:55:00 -0700135#ifdef CONFIG_HOTPLUG_CPU
136DECLARE_PER_CPU(int, cpu_state);
137
138#include <asm/nmi.h>
Shaohua Li1fa744e2006-01-06 00:12:20 -0800139/* We halt the CPU with physical CPU hotplug */
Ashok Raj76e4f662005-06-25 14:55:00 -0700140static inline void play_dead(void)
141{
142 idle_task_exit();
143 wbinvd();
144 mb();
145 /* Ack it */
146 __get_cpu_var(cpu_state) = CPU_DEAD;
147
Shaohua Li1fa744e2006-01-06 00:12:20 -0800148 local_irq_disable();
Ashok Raj76e4f662005-06-25 14:55:00 -0700149 while (1)
Shaohua Li1fa744e2006-01-06 00:12:20 -0800150 halt();
Ashok Raj76e4f662005-06-25 14:55:00 -0700151}
152#else
153static inline void play_dead(void)
154{
155 BUG();
156}
157#endif /* CONFIG_HOTPLUG_CPU */
158
Linus Torvalds1da177e2005-04-16 15:20:36 -0700159/*
160 * The idle thread. There's no useful work to be
161 * done, so just try to conserve power and have a
162 * low exit latency (ie sit in a loop waiting for
163 * somebody to say that they'd like to reschedule)
164 */
165void cpu_idle (void)
166{
Nick Piggin64c7c8f2005-11-08 21:39:04 -0800167 set_thread_flag(TIF_POLLING_NRFLAG);
168
Linus Torvalds1da177e2005-04-16 15:20:36 -0700169 /* endless idle loop with no priority at all */
170 while (1) {
171 while (!need_resched()) {
172 void (*idle)(void);
173
174 if (__get_cpu_var(cpu_idle_state))
175 __get_cpu_var(cpu_idle_state) = 0;
176
177 rmb();
178 idle = pm_idle;
179 if (!idle)
180 idle = default_idle;
Ashok Raj76e4f662005-06-25 14:55:00 -0700181 if (cpu_is_offline(smp_processor_id()))
182 play_dead();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183 idle();
184 }
185
Nick Piggin5bfb5d62005-11-08 21:39:01 -0800186 preempt_enable_no_resched();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 schedule();
Nick Piggin5bfb5d62005-11-08 21:39:01 -0800188 preempt_disable();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 }
190}
191
192/*
193 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
194 * which can obviate IPI to trigger checking of need_resched.
195 * We execute MONITOR against need_resched and enter optimized wait state
196 * through MWAIT. Whenever someone changes need_resched, we would be woken
197 * up from MWAIT (without an IPI).
198 */
199static void mwait_idle(void)
200{
201 local_irq_enable();
202
Nick Piggin64c7c8f2005-11-08 21:39:04 -0800203 while (!need_resched()) {
204 __monitor((void *)&current_thread_info()->flags, 0, 0);
205 smp_mb();
206 if (need_resched())
207 break;
208 __mwait(0, 0);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700209 }
210}
211
Ashok Raje6982c62005-06-25 14:54:58 -0700212void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213{
214 static int printed;
215 if (cpu_has(c, X86_FEATURE_MWAIT)) {
216 /*
217 * Skip, if setup has overridden idle.
218 * One CPU supports mwait => All CPUs supports mwait
219 */
220 if (!pm_idle) {
221 if (!printed) {
222 printk("using mwait in idle threads.\n");
223 printed = 1;
224 }
225 pm_idle = mwait_idle;
226 }
227 }
228}
229
230static int __init idle_setup (char *str)
231{
232 if (!strncmp(str, "poll", 4)) {
233 printk("using polling idle threads.\n");
234 pm_idle = poll_idle;
235 }
236
237 boot_option_idle_override = 1;
238 return 1;
239}
240
241__setup("idle=", idle_setup);
242
243/* Prints also some state that isn't saved in the pt_regs */
244void __show_regs(struct pt_regs * regs)
245{
246 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
247 unsigned int fsindex,gsindex;
248 unsigned int ds,cs,es;
249
250 printk("\n");
251 print_modules();
Andi Kleen9acf23c2005-09-12 18:49:24 +0200252 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
253 current->pid, current->comm, print_tainted(),
254 system_utsname.release,
255 (int)strcspn(system_utsname.version, " "),
256 system_utsname.version);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
258 printk_address(regs->rip);
Andi Kleena88cde12005-11-05 17:25:54 +0100259 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
260 regs->eflags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700261 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
262 regs->rax, regs->rbx, regs->rcx);
263 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
264 regs->rdx, regs->rsi, regs->rdi);
265 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
266 regs->rbp, regs->r8, regs->r9);
267 printk("R10: %016lx R11: %016lx R12: %016lx\n",
268 regs->r10, regs->r11, regs->r12);
269 printk("R13: %016lx R14: %016lx R15: %016lx\n",
270 regs->r13, regs->r14, regs->r15);
271
272 asm("movl %%ds,%0" : "=r" (ds));
273 asm("movl %%cs,%0" : "=r" (cs));
274 asm("movl %%es,%0" : "=r" (es));
275 asm("movl %%fs,%0" : "=r" (fsindex));
276 asm("movl %%gs,%0" : "=r" (gsindex));
277
278 rdmsrl(MSR_FS_BASE, fs);
279 rdmsrl(MSR_GS_BASE, gs);
280 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
281
282 asm("movq %%cr0, %0": "=r" (cr0));
283 asm("movq %%cr2, %0": "=r" (cr2));
284 asm("movq %%cr3, %0": "=r" (cr3));
285 asm("movq %%cr4, %0": "=r" (cr4));
286
287 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
288 fs,fsindex,gs,gsindex,shadowgs);
289 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
290 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
291}
292
293void show_regs(struct pt_regs *regs)
294{
Zwane Mwaikamboc078d322005-09-06 15:16:16 -0700295 printk("CPU %d:", smp_processor_id());
Linus Torvalds1da177e2005-04-16 15:20:36 -0700296 __show_regs(regs);
297 show_trace(&regs->rsp);
298}
299
300/*
301 * Free current thread data structures etc..
302 */
303void exit_thread(void)
304{
305 struct task_struct *me = current;
306 struct thread_struct *t = &me->thread;
Rusty Lynch73649da2005-06-23 00:09:23 -0700307
308 /*
309 * Remove function-return probe instances associated with this task
310 * and put them back on the free list. Do not insert an exit probe for
311 * this function, it will be disabled by kprobe_flush_task if you do.
312 */
313 kprobe_flush_task(me);
314
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315 if (me->thread.io_bitmap_ptr) {
316 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
317
318 kfree(t->io_bitmap_ptr);
319 t->io_bitmap_ptr = NULL;
320 /*
321 * Careful, clear this in the TSS too:
322 */
323 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
324 t->io_bitmap_max = 0;
325 put_cpu();
326 }
327}
328
329void flush_thread(void)
330{
331 struct task_struct *tsk = current;
332 struct thread_info *t = current_thread_info();
333
334 if (t->flags & _TIF_ABI_PENDING)
335 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
336
337 tsk->thread.debugreg0 = 0;
338 tsk->thread.debugreg1 = 0;
339 tsk->thread.debugreg2 = 0;
340 tsk->thread.debugreg3 = 0;
341 tsk->thread.debugreg6 = 0;
342 tsk->thread.debugreg7 = 0;
343 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
344 /*
345 * Forget coprocessor state..
346 */
347 clear_fpu(tsk);
348 clear_used_math();
349}
350
351void release_thread(struct task_struct *dead_task)
352{
353 if (dead_task->mm) {
354 if (dead_task->mm->context.size) {
355 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
356 dead_task->comm,
357 dead_task->mm->context.ldt,
358 dead_task->mm->context.size);
359 BUG();
360 }
361 }
362}
363
364static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
365{
366 struct user_desc ud = {
367 .base_addr = addr,
368 .limit = 0xfffff,
369 .seg_32bit = 1,
370 .limit_in_pages = 1,
371 .useable = 1,
372 };
373 struct n_desc_struct *desc = (void *)t->thread.tls_array;
374 desc += tls;
375 desc->a = LDT_entry_a(&ud);
376 desc->b = LDT_entry_b(&ud);
377}
378
379static inline u32 read_32bit_tls(struct task_struct *t, int tls)
380{
381 struct desc_struct *desc = (void *)t->thread.tls_array;
382 desc += tls;
383 return desc->base0 |
384 (((u32)desc->base1) << 16) |
385 (((u32)desc->base2) << 24);
386}
387
388/*
389 * This gets called before we allocate a new thread and copy
390 * the current task into it.
391 */
392void prepare_to_copy(struct task_struct *tsk)
393{
394 unlazy_fpu(tsk);
395}
396
397int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
398 unsigned long unused,
399 struct task_struct * p, struct pt_regs * regs)
400{
401 int err;
402 struct pt_regs * childregs;
403 struct task_struct *me = current;
404
Andi Kleena88cde12005-11-05 17:25:54 +0100405 childregs = ((struct pt_regs *)
406 (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700407 *childregs = *regs;
408
409 childregs->rax = 0;
410 childregs->rsp = rsp;
Andi Kleena88cde12005-11-05 17:25:54 +0100411 if (rsp == ~0UL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 childregs->rsp = (unsigned long)childregs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700413
414 p->thread.rsp = (unsigned long) childregs;
415 p->thread.rsp0 = (unsigned long) (childregs+1);
416 p->thread.userrsp = me->thread.userrsp;
417
418 set_ti_thread_flag(p->thread_info, TIF_FORK);
419
420 p->thread.fs = me->thread.fs;
421 p->thread.gs = me->thread.gs;
422
H. J. Lufd51f662005-05-01 08:58:48 -0700423 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
424 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
425 asm("mov %%es,%0" : "=m" (p->thread.es));
426 asm("mov %%ds,%0" : "=m" (p->thread.ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427
428 if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
429 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
430 if (!p->thread.io_bitmap_ptr) {
431 p->thread.io_bitmap_max = 0;
432 return -ENOMEM;
433 }
Andi Kleena88cde12005-11-05 17:25:54 +0100434 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
435 IO_BITMAP_BYTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 }
437
438 /*
439 * Set a new TLS for the child thread?
440 */
441 if (clone_flags & CLONE_SETTLS) {
442#ifdef CONFIG_IA32_EMULATION
443 if (test_thread_flag(TIF_IA32))
444 err = ia32_child_tls(p, childregs);
445 else
446#endif
447 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
448 if (err)
449 goto out;
450 }
451 err = 0;
452out:
453 if (err && p->thread.io_bitmap_ptr) {
454 kfree(p->thread.io_bitmap_ptr);
455 p->thread.io_bitmap_max = 0;
456 }
457 return err;
458}
459
460/*
461 * This special macro can be used to load a debugging register
462 */
463#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r)
464
465/*
466 * switch_to(x,y) should switch tasks from x to y.
467 *
468 * This could still be optimized:
469 * - fold all the options into a flag word and test it with a single test.
470 * - could test fs/gs bitsliced
471 */
Andi Kleena88cde12005-11-05 17:25:54 +0100472struct task_struct *
473__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700474{
475 struct thread_struct *prev = &prev_p->thread,
476 *next = &next_p->thread;
477 int cpu = smp_processor_id();
478 struct tss_struct *tss = &per_cpu(init_tss, cpu);
479
480 unlazy_fpu(prev_p);
481
482 /*
483 * Reload esp0, LDT and the page table pointer:
484 */
485 tss->rsp0 = next->rsp0;
486
487 /*
488 * Switch DS and ES.
489 * This won't pick up thread selector changes, but I guess that is ok.
490 */
H. J. Lufd51f662005-05-01 08:58:48 -0700491 asm volatile("mov %%es,%0" : "=m" (prev->es));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700492 if (unlikely(next->es | prev->es))
493 loadsegment(es, next->es);
494
H. J. Lufd51f662005-05-01 08:58:48 -0700495 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700496 if (unlikely(next->ds | prev->ds))
497 loadsegment(ds, next->ds);
498
499 load_TLS(next, cpu);
500
501 /*
502 * Switch FS and GS.
503 */
504 {
505 unsigned fsindex;
506 asm volatile("movl %%fs,%0" : "=r" (fsindex));
507 /* segment register != 0 always requires a reload.
508 also reload when it has changed.
509 when prev process used 64bit base always reload
510 to avoid an information leak. */
511 if (unlikely(fsindex | next->fsindex | prev->fs)) {
512 loadsegment(fs, next->fsindex);
513 /* check if the user used a selector != 0
514 * if yes clear 64bit base, since overloaded base
515 * is always mapped to the Null selector
516 */
517 if (fsindex)
518 prev->fs = 0;
519 }
520 /* when next process has a 64bit base use it */
521 if (next->fs)
522 wrmsrl(MSR_FS_BASE, next->fs);
523 prev->fsindex = fsindex;
524 }
525 {
526 unsigned gsindex;
527 asm volatile("movl %%gs,%0" : "=r" (gsindex));
528 if (unlikely(gsindex | next->gsindex | prev->gs)) {
529 load_gs_index(next->gsindex);
530 if (gsindex)
531 prev->gs = 0;
532 }
533 if (next->gs)
534 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
535 prev->gsindex = gsindex;
536 }
537
538 /*
539 * Switch the PDA context.
540 */
541 prev->userrsp = read_pda(oldrsp);
542 write_pda(oldrsp, next->userrsp);
543 write_pda(pcurrent, next_p);
Andi Kleena88cde12005-11-05 17:25:54 +0100544 write_pda(kernelstack,
545 (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546
547 /*
548 * Now maybe reload the debug registers
549 */
550 if (unlikely(next->debugreg7)) {
551 loaddebug(next, 0);
552 loaddebug(next, 1);
553 loaddebug(next, 2);
554 loaddebug(next, 3);
555 /* no 4 and 5 */
556 loaddebug(next, 6);
557 loaddebug(next, 7);
558 }
559
560
561 /*
562 * Handle the IO bitmap
563 */
564 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
565 if (next->io_bitmap_ptr)
566 /*
567 * Copy the relevant range of the IO bitmap.
568 * Normally this is 128 bytes or less:
569 */
570 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
571 max(prev->io_bitmap_max, next->io_bitmap_max));
572 else {
573 /*
574 * Clear any possible leftover bits:
575 */
576 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
577 }
578 }
579
580 return prev_p;
581}
582
583/*
584 * sys_execve() executes a new program.
585 */
586asmlinkage
587long sys_execve(char __user *name, char __user * __user *argv,
588 char __user * __user *envp, struct pt_regs regs)
589{
590 long error;
591 char * filename;
592
593 filename = getname(name);
594 error = PTR_ERR(filename);
595 if (IS_ERR(filename))
596 return error;
597 error = do_execve(filename, argv, envp, &regs);
598 if (error == 0) {
599 task_lock(current);
600 current->ptrace &= ~PT_DTRACE;
601 task_unlock(current);
602 }
603 putname(filename);
604 return error;
605}
606
607void set_personality_64bit(void)
608{
609 /* inherit personality from parent */
610
611 /* Make sure to be in 64bit mode */
612 clear_thread_flag(TIF_IA32);
613
614 /* TBD: overwrites user setup. Should have two bits.
615 But 64bit processes have always behaved this way,
616 so it's not too bad. The main problem is just that
617 32bit childs are affected again. */
618 current->personality &= ~READ_IMPLIES_EXEC;
619}
620
621asmlinkage long sys_fork(struct pt_regs *regs)
622{
623 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
624}
625
Andi Kleena88cde12005-11-05 17:25:54 +0100626asmlinkage long
627sys_clone(unsigned long clone_flags, unsigned long newsp,
628 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700629{
630 if (!newsp)
631 newsp = regs->rsp;
632 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
633}
634
635/*
636 * This is trivial, and on the face of it looks like it
637 * could equally well be done in user mode.
638 *
639 * Not so, for quite unobvious reasons - register pressure.
640 * In user mode vfork() cannot have a stack frame, and if
641 * done by calling the "clone()" system call directly, you
642 * do not have enough call-clobbered registers to hold all
643 * the information you need.
644 */
645asmlinkage long sys_vfork(struct pt_regs *regs)
646{
647 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
648 NULL, NULL);
649}
650
651unsigned long get_wchan(struct task_struct *p)
652{
653 unsigned long stack;
654 u64 fp,rip;
655 int count = 0;
656
657 if (!p || p == current || p->state==TASK_RUNNING)
658 return 0;
659 stack = (unsigned long)p->thread_info;
660 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
661 return 0;
662 fp = *(u64 *)(p->thread.rsp);
663 do {
Andi Kleena88cde12005-11-05 17:25:54 +0100664 if (fp < (unsigned long)stack ||
665 fp > (unsigned long)stack+THREAD_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700666 return 0;
667 rip = *(u64 *)(fp+8);
668 if (!in_sched_functions(rip))
669 return rip;
670 fp = *(u64 *)fp;
671 } while (count++ < 16);
672 return 0;
673}
674
675long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
676{
677 int ret = 0;
678 int doit = task == current;
679 int cpu;
680
681 switch (code) {
682 case ARCH_SET_GS:
Suresh Siddha84929802005-06-21 17:14:32 -0700683 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700684 return -EPERM;
685 cpu = get_cpu();
686 /* handle small bases via the GDT because that's faster to
687 switch. */
688 if (addr <= 0xffffffff) {
689 set_32bit_tls(task, GS_TLS, addr);
690 if (doit) {
691 load_TLS(&task->thread, cpu);
692 load_gs_index(GS_TLS_SEL);
693 }
694 task->thread.gsindex = GS_TLS_SEL;
695 task->thread.gs = 0;
696 } else {
697 task->thread.gsindex = 0;
698 task->thread.gs = addr;
699 if (doit) {
Andi Kleena88cde12005-11-05 17:25:54 +0100700 load_gs_index(0);
701 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700702 }
703 }
704 put_cpu();
705 break;
706 case ARCH_SET_FS:
707 /* Not strictly needed for fs, but do it for symmetry
708 with gs */
Suresh Siddha84929802005-06-21 17:14:32 -0700709 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 return -EPERM;
711 cpu = get_cpu();
712 /* handle small bases via the GDT because that's faster to
713 switch. */
714 if (addr <= 0xffffffff) {
715 set_32bit_tls(task, FS_TLS, addr);
716 if (doit) {
717 load_TLS(&task->thread, cpu);
Andi Kleena88cde12005-11-05 17:25:54 +0100718 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 }
720 task->thread.fsindex = FS_TLS_SEL;
721 task->thread.fs = 0;
722 } else {
723 task->thread.fsindex = 0;
724 task->thread.fs = addr;
725 if (doit) {
726 /* set the selector to 0 to not confuse
727 __switch_to */
Andi Kleena88cde12005-11-05 17:25:54 +0100728 asm volatile("movl %0,%%fs" :: "r" (0));
729 ret = checking_wrmsrl(MSR_FS_BASE, addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700730 }
731 }
732 put_cpu();
733 break;
734 case ARCH_GET_FS: {
735 unsigned long base;
736 if (task->thread.fsindex == FS_TLS_SEL)
737 base = read_32bit_tls(task, FS_TLS);
Andi Kleena88cde12005-11-05 17:25:54 +0100738 else if (doit)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 rdmsrl(MSR_FS_BASE, base);
Andi Kleena88cde12005-11-05 17:25:54 +0100740 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741 base = task->thread.fs;
742 ret = put_user(base, (unsigned long __user *)addr);
743 break;
744 }
745 case ARCH_GET_GS: {
746 unsigned long base;
747 if (task->thread.gsindex == GS_TLS_SEL)
748 base = read_32bit_tls(task, GS_TLS);
Andi Kleena88cde12005-11-05 17:25:54 +0100749 else if (doit)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750 rdmsrl(MSR_KERNEL_GS_BASE, base);
Andi Kleena88cde12005-11-05 17:25:54 +0100751 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752 base = task->thread.gs;
753 ret = put_user(base, (unsigned long __user *)addr);
754 break;
755 }
756
757 default:
758 ret = -EINVAL;
759 break;
760 }
761
762 return ret;
763}
764
765long sys_arch_prctl(int code, unsigned long addr)
766{
767 return do_arch_prctl(current, code, addr);
768}
769
770/*
771 * Capture the user space registers if the task is not running (in user space)
772 */
773int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
774{
775 struct pt_regs *pp, ptregs;
776
777 pp = (struct pt_regs *)(tsk->thread.rsp0);
778 --pp;
779
780 ptregs = *pp;
781 ptregs.cs &= 0xffff;
782 ptregs.ss &= 0xffff;
783
784 elf_core_copy_regs(regs, &ptregs);
785
786 return 1;
787}
788
789unsigned long arch_align_stack(unsigned long sp)
790{
791 if (randomize_va_space)
792 sp -= get_random_int() % 8192;
793 return sp & ~0xf;
794}