blob: bc44c92ca1981102ce239889b14ab9e1f71b31f7 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86-64/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 * X86-64 port
10 * Andi Kleen.
Ashok Raj76e4f662005-06-25 14:55:00 -070011 *
12 * CPU hotplug support - ashok.raj@intel.com
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
14 */
15
16/*
17 * This file handles the architecture-dependent parts of process handling..
18 */
19
20#include <stdarg.h>
21
Ashok Raj76e4f662005-06-25 14:55:00 -070022#include <linux/cpu.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/errno.h>
24#include <linux/sched.h>
25#include <linux/kernel.h>
26#include <linux/mm.h>
27#include <linux/elfcore.h>
28#include <linux/smp.h>
29#include <linux/slab.h>
30#include <linux/user.h>
31#include <linux/module.h>
32#include <linux/a.out.h>
33#include <linux/interrupt.h>
34#include <linux/delay.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070035#include <linux/ptrace.h>
36#include <linux/utsname.h>
37#include <linux/random.h>
Rusty Lynch73649da2005-06-23 00:09:23 -070038#include <linux/kprobes.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070039
40#include <asm/uaccess.h>
41#include <asm/pgtable.h>
42#include <asm/system.h>
43#include <asm/io.h>
44#include <asm/processor.h>
45#include <asm/i387.h>
46#include <asm/mmu_context.h>
47#include <asm/pda.h>
48#include <asm/prctl.h>
49#include <asm/kdebug.h>
50#include <asm/desc.h>
51#include <asm/proto.h>
52#include <asm/ia32.h>
53
54asmlinkage extern void ret_from_fork(void);
55
56unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
57
58static atomic_t hlt_counter = ATOMIC_INIT(0);
59
60unsigned long boot_option_idle_override = 0;
61EXPORT_SYMBOL(boot_option_idle_override);
62
63/*
64 * Powermanagement idle function, if any..
65 */
66void (*pm_idle)(void);
67static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
68
69void disable_hlt(void)
70{
71 atomic_inc(&hlt_counter);
72}
73
74EXPORT_SYMBOL(disable_hlt);
75
76void enable_hlt(void)
77{
78 atomic_dec(&hlt_counter);
79}
80
81EXPORT_SYMBOL(enable_hlt);
82
83/*
84 * We use this if we don't have any better
85 * idle routine..
86 */
87void default_idle(void)
88{
89 if (!atomic_read(&hlt_counter)) {
90 local_irq_disable();
91 if (!need_resched())
92 safe_halt();
93 else
94 local_irq_enable();
95 }
96}
97
98/*
99 * On SMP it's slightly faster (but much more power-consuming!)
100 * to poll the ->need_resched flag instead of waiting for the
101 * cross-CPU IPI to arrive. Use this option with caution.
102 */
103static void poll_idle (void)
104{
105 int oldval;
106
107 local_irq_enable();
108
109 /*
110 * Deal with another CPU just having chosen a thread to
111 * run here:
112 */
113 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
114
115 if (!oldval) {
116 set_thread_flag(TIF_POLLING_NRFLAG);
117 asm volatile(
118 "2:"
119 "testl %0,%1;"
120 "rep; nop;"
121 "je 2b;"
122 : :
123 "i" (_TIF_NEED_RESCHED),
124 "m" (current_thread_info()->flags));
Hugh Dickinsb8f68e92005-09-12 18:49:24 +0200125 clear_thread_flag(TIF_POLLING_NRFLAG);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700126 } else {
127 set_need_resched();
128 }
129}
130
131void cpu_idle_wait(void)
132{
133 unsigned int cpu, this_cpu = get_cpu();
134 cpumask_t map;
135
136 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
137 put_cpu();
138
139 cpus_clear(map);
140 for_each_online_cpu(cpu) {
141 per_cpu(cpu_idle_state, cpu) = 1;
142 cpu_set(cpu, map);
143 }
144
145 __get_cpu_var(cpu_idle_state) = 0;
146
147 wmb();
148 do {
149 ssleep(1);
150 for_each_online_cpu(cpu) {
Andi Kleena88cde12005-11-05 17:25:54 +0100151 if (cpu_isset(cpu, map) &&
152 !per_cpu(cpu_idle_state, cpu))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 cpu_clear(cpu, map);
154 }
155 cpus_and(map, map, cpu_online_map);
156 } while (!cpus_empty(map));
157}
158EXPORT_SYMBOL_GPL(cpu_idle_wait);
159
Ashok Raj76e4f662005-06-25 14:55:00 -0700160#ifdef CONFIG_HOTPLUG_CPU
161DECLARE_PER_CPU(int, cpu_state);
162
163#include <asm/nmi.h>
164/* We don't actually take CPU down, just spin without interrupts. */
165static inline void play_dead(void)
166{
167 idle_task_exit();
168 wbinvd();
169 mb();
170 /* Ack it */
171 __get_cpu_var(cpu_state) = CPU_DEAD;
172
173 while (1)
174 safe_halt();
175}
176#else
177static inline void play_dead(void)
178{
179 BUG();
180}
181#endif /* CONFIG_HOTPLUG_CPU */
182
Linus Torvalds1da177e2005-04-16 15:20:36 -0700183/*
184 * The idle thread. There's no useful work to be
185 * done, so just try to conserve power and have a
186 * low exit latency (ie sit in a loop waiting for
187 * somebody to say that they'd like to reschedule)
188 */
189void cpu_idle (void)
190{
191 /* endless idle loop with no priority at all */
192 while (1) {
193 while (!need_resched()) {
194 void (*idle)(void);
195
196 if (__get_cpu_var(cpu_idle_state))
197 __get_cpu_var(cpu_idle_state) = 0;
198
199 rmb();
200 idle = pm_idle;
201 if (!idle)
202 idle = default_idle;
Ashok Raj76e4f662005-06-25 14:55:00 -0700203 if (cpu_is_offline(smp_processor_id()))
204 play_dead();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205 idle();
206 }
207
208 schedule();
209 }
210}
211
212/*
213 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
214 * which can obviate IPI to trigger checking of need_resched.
215 * We execute MONITOR against need_resched and enter optimized wait state
216 * through MWAIT. Whenever someone changes need_resched, we would be woken
217 * up from MWAIT (without an IPI).
218 */
219static void mwait_idle(void)
220{
221 local_irq_enable();
222
223 if (!need_resched()) {
224 set_thread_flag(TIF_POLLING_NRFLAG);
225 do {
226 __monitor((void *)&current_thread_info()->flags, 0, 0);
227 if (need_resched())
228 break;
229 __mwait(0, 0);
230 } while (!need_resched());
231 clear_thread_flag(TIF_POLLING_NRFLAG);
232 }
233}
234
Ashok Raje6982c62005-06-25 14:54:58 -0700235void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700236{
237 static int printed;
238 if (cpu_has(c, X86_FEATURE_MWAIT)) {
239 /*
240 * Skip, if setup has overridden idle.
241 * One CPU supports mwait => All CPUs supports mwait
242 */
243 if (!pm_idle) {
244 if (!printed) {
245 printk("using mwait in idle threads.\n");
246 printed = 1;
247 }
248 pm_idle = mwait_idle;
249 }
250 }
251}
252
253static int __init idle_setup (char *str)
254{
255 if (!strncmp(str, "poll", 4)) {
256 printk("using polling idle threads.\n");
257 pm_idle = poll_idle;
258 }
259
260 boot_option_idle_override = 1;
261 return 1;
262}
263
264__setup("idle=", idle_setup);
265
266/* Prints also some state that isn't saved in the pt_regs */
267void __show_regs(struct pt_regs * regs)
268{
269 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
270 unsigned int fsindex,gsindex;
271 unsigned int ds,cs,es;
272
273 printk("\n");
274 print_modules();
Andi Kleen9acf23c2005-09-12 18:49:24 +0200275 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
276 current->pid, current->comm, print_tainted(),
277 system_utsname.release,
278 (int)strcspn(system_utsname.version, " "),
279 system_utsname.version);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700280 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
281 printk_address(regs->rip);
Andi Kleena88cde12005-11-05 17:25:54 +0100282 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp,
283 regs->eflags);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
285 regs->rax, regs->rbx, regs->rcx);
286 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
287 regs->rdx, regs->rsi, regs->rdi);
288 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
289 regs->rbp, regs->r8, regs->r9);
290 printk("R10: %016lx R11: %016lx R12: %016lx\n",
291 regs->r10, regs->r11, regs->r12);
292 printk("R13: %016lx R14: %016lx R15: %016lx\n",
293 regs->r13, regs->r14, regs->r15);
294
295 asm("movl %%ds,%0" : "=r" (ds));
296 asm("movl %%cs,%0" : "=r" (cs));
297 asm("movl %%es,%0" : "=r" (es));
298 asm("movl %%fs,%0" : "=r" (fsindex));
299 asm("movl %%gs,%0" : "=r" (gsindex));
300
301 rdmsrl(MSR_FS_BASE, fs);
302 rdmsrl(MSR_GS_BASE, gs);
303 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
304
305 asm("movq %%cr0, %0": "=r" (cr0));
306 asm("movq %%cr2, %0": "=r" (cr2));
307 asm("movq %%cr3, %0": "=r" (cr3));
308 asm("movq %%cr4, %0": "=r" (cr4));
309
310 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
311 fs,fsindex,gs,gsindex,shadowgs);
312 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
313 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
314}
315
316void show_regs(struct pt_regs *regs)
317{
Zwane Mwaikamboc078d322005-09-06 15:16:16 -0700318 printk("CPU %d:", smp_processor_id());
Linus Torvalds1da177e2005-04-16 15:20:36 -0700319 __show_regs(regs);
320 show_trace(&regs->rsp);
321}
322
323/*
324 * Free current thread data structures etc..
325 */
326void exit_thread(void)
327{
328 struct task_struct *me = current;
329 struct thread_struct *t = &me->thread;
Rusty Lynch73649da2005-06-23 00:09:23 -0700330
331 /*
332 * Remove function-return probe instances associated with this task
333 * and put them back on the free list. Do not insert an exit probe for
334 * this function, it will be disabled by kprobe_flush_task if you do.
335 */
336 kprobe_flush_task(me);
337
Linus Torvalds1da177e2005-04-16 15:20:36 -0700338 if (me->thread.io_bitmap_ptr) {
339 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
340
341 kfree(t->io_bitmap_ptr);
342 t->io_bitmap_ptr = NULL;
343 /*
344 * Careful, clear this in the TSS too:
345 */
346 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
347 t->io_bitmap_max = 0;
348 put_cpu();
349 }
350}
351
352void flush_thread(void)
353{
354 struct task_struct *tsk = current;
355 struct thread_info *t = current_thread_info();
356
Rusty Lynch73649da2005-06-23 00:09:23 -0700357 /*
358 * Remove function-return probe instances associated with this task
359 * and put them back on the free list. Do not insert an exit probe for
360 * this function, it will be disabled by kprobe_flush_task if you do.
361 */
362 kprobe_flush_task(tsk);
363
Linus Torvalds1da177e2005-04-16 15:20:36 -0700364 if (t->flags & _TIF_ABI_PENDING)
365 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
366
367 tsk->thread.debugreg0 = 0;
368 tsk->thread.debugreg1 = 0;
369 tsk->thread.debugreg2 = 0;
370 tsk->thread.debugreg3 = 0;
371 tsk->thread.debugreg6 = 0;
372 tsk->thread.debugreg7 = 0;
373 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
374 /*
375 * Forget coprocessor state..
376 */
377 clear_fpu(tsk);
378 clear_used_math();
379}
380
381void release_thread(struct task_struct *dead_task)
382{
383 if (dead_task->mm) {
384 if (dead_task->mm->context.size) {
385 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
386 dead_task->comm,
387 dead_task->mm->context.ldt,
388 dead_task->mm->context.size);
389 BUG();
390 }
391 }
392}
393
394static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
395{
396 struct user_desc ud = {
397 .base_addr = addr,
398 .limit = 0xfffff,
399 .seg_32bit = 1,
400 .limit_in_pages = 1,
401 .useable = 1,
402 };
403 struct n_desc_struct *desc = (void *)t->thread.tls_array;
404 desc += tls;
405 desc->a = LDT_entry_a(&ud);
406 desc->b = LDT_entry_b(&ud);
407}
408
409static inline u32 read_32bit_tls(struct task_struct *t, int tls)
410{
411 struct desc_struct *desc = (void *)t->thread.tls_array;
412 desc += tls;
413 return desc->base0 |
414 (((u32)desc->base1) << 16) |
415 (((u32)desc->base2) << 24);
416}
417
418/*
419 * This gets called before we allocate a new thread and copy
420 * the current task into it.
421 */
422void prepare_to_copy(struct task_struct *tsk)
423{
424 unlazy_fpu(tsk);
425}
426
427int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
428 unsigned long unused,
429 struct task_struct * p, struct pt_regs * regs)
430{
431 int err;
432 struct pt_regs * childregs;
433 struct task_struct *me = current;
434
Andi Kleena88cde12005-11-05 17:25:54 +0100435 childregs = ((struct pt_regs *)
436 (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700437 *childregs = *regs;
438
439 childregs->rax = 0;
440 childregs->rsp = rsp;
Andi Kleena88cde12005-11-05 17:25:54 +0100441 if (rsp == ~0UL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700442 childregs->rsp = (unsigned long)childregs;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700443
444 p->thread.rsp = (unsigned long) childregs;
445 p->thread.rsp0 = (unsigned long) (childregs+1);
446 p->thread.userrsp = me->thread.userrsp;
447
448 set_ti_thread_flag(p->thread_info, TIF_FORK);
449
450 p->thread.fs = me->thread.fs;
451 p->thread.gs = me->thread.gs;
452
H. J. Lufd51f662005-05-01 08:58:48 -0700453 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
454 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
455 asm("mov %%es,%0" : "=m" (p->thread.es));
456 asm("mov %%ds,%0" : "=m" (p->thread.ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700457
458 if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
459 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
460 if (!p->thread.io_bitmap_ptr) {
461 p->thread.io_bitmap_max = 0;
462 return -ENOMEM;
463 }
Andi Kleena88cde12005-11-05 17:25:54 +0100464 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr,
465 IO_BITMAP_BYTES);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700466 }
467
468 /*
469 * Set a new TLS for the child thread?
470 */
471 if (clone_flags & CLONE_SETTLS) {
472#ifdef CONFIG_IA32_EMULATION
473 if (test_thread_flag(TIF_IA32))
474 err = ia32_child_tls(p, childregs);
475 else
476#endif
477 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
478 if (err)
479 goto out;
480 }
481 err = 0;
482out:
483 if (err && p->thread.io_bitmap_ptr) {
484 kfree(p->thread.io_bitmap_ptr);
485 p->thread.io_bitmap_max = 0;
486 }
487 return err;
488}
489
490/*
491 * This special macro can be used to load a debugging register
492 */
493#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r)
494
495/*
496 * switch_to(x,y) should switch tasks from x to y.
497 *
498 * This could still be optimized:
499 * - fold all the options into a flag word and test it with a single test.
500 * - could test fs/gs bitsliced
501 */
Andi Kleena88cde12005-11-05 17:25:54 +0100502struct task_struct *
503__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700504{
505 struct thread_struct *prev = &prev_p->thread,
506 *next = &next_p->thread;
507 int cpu = smp_processor_id();
508 struct tss_struct *tss = &per_cpu(init_tss, cpu);
509
510 unlazy_fpu(prev_p);
511
512 /*
513 * Reload esp0, LDT and the page table pointer:
514 */
515 tss->rsp0 = next->rsp0;
516
517 /*
518 * Switch DS and ES.
519 * This won't pick up thread selector changes, but I guess that is ok.
520 */
H. J. Lufd51f662005-05-01 08:58:48 -0700521 asm volatile("mov %%es,%0" : "=m" (prev->es));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700522 if (unlikely(next->es | prev->es))
523 loadsegment(es, next->es);
524
H. J. Lufd51f662005-05-01 08:58:48 -0700525 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700526 if (unlikely(next->ds | prev->ds))
527 loadsegment(ds, next->ds);
528
529 load_TLS(next, cpu);
530
531 /*
532 * Switch FS and GS.
533 */
534 {
535 unsigned fsindex;
536 asm volatile("movl %%fs,%0" : "=r" (fsindex));
537 /* segment register != 0 always requires a reload.
538 also reload when it has changed.
539 when prev process used 64bit base always reload
540 to avoid an information leak. */
541 if (unlikely(fsindex | next->fsindex | prev->fs)) {
542 loadsegment(fs, next->fsindex);
543 /* check if the user used a selector != 0
544 * if yes clear 64bit base, since overloaded base
545 * is always mapped to the Null selector
546 */
547 if (fsindex)
548 prev->fs = 0;
549 }
550 /* when next process has a 64bit base use it */
551 if (next->fs)
552 wrmsrl(MSR_FS_BASE, next->fs);
553 prev->fsindex = fsindex;
554 }
555 {
556 unsigned gsindex;
557 asm volatile("movl %%gs,%0" : "=r" (gsindex));
558 if (unlikely(gsindex | next->gsindex | prev->gs)) {
559 load_gs_index(next->gsindex);
560 if (gsindex)
561 prev->gs = 0;
562 }
563 if (next->gs)
564 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
565 prev->gsindex = gsindex;
566 }
567
568 /*
569 * Switch the PDA context.
570 */
571 prev->userrsp = read_pda(oldrsp);
572 write_pda(oldrsp, next->userrsp);
573 write_pda(pcurrent, next_p);
Andi Kleena88cde12005-11-05 17:25:54 +0100574 write_pda(kernelstack,
575 (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700576
577 /*
578 * Now maybe reload the debug registers
579 */
580 if (unlikely(next->debugreg7)) {
581 loaddebug(next, 0);
582 loaddebug(next, 1);
583 loaddebug(next, 2);
584 loaddebug(next, 3);
585 /* no 4 and 5 */
586 loaddebug(next, 6);
587 loaddebug(next, 7);
588 }
589
590
591 /*
592 * Handle the IO bitmap
593 */
594 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
595 if (next->io_bitmap_ptr)
596 /*
597 * Copy the relevant range of the IO bitmap.
598 * Normally this is 128 bytes or less:
599 */
600 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
601 max(prev->io_bitmap_max, next->io_bitmap_max));
602 else {
603 /*
604 * Clear any possible leftover bits:
605 */
606 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
607 }
608 }
609
610 return prev_p;
611}
612
613/*
614 * sys_execve() executes a new program.
615 */
616asmlinkage
617long sys_execve(char __user *name, char __user * __user *argv,
618 char __user * __user *envp, struct pt_regs regs)
619{
620 long error;
621 char * filename;
622
623 filename = getname(name);
624 error = PTR_ERR(filename);
625 if (IS_ERR(filename))
626 return error;
627 error = do_execve(filename, argv, envp, &regs);
628 if (error == 0) {
629 task_lock(current);
630 current->ptrace &= ~PT_DTRACE;
631 task_unlock(current);
632 }
633 putname(filename);
634 return error;
635}
636
637void set_personality_64bit(void)
638{
639 /* inherit personality from parent */
640
641 /* Make sure to be in 64bit mode */
642 clear_thread_flag(TIF_IA32);
643
644 /* TBD: overwrites user setup. Should have two bits.
645 But 64bit processes have always behaved this way,
646 so it's not too bad. The main problem is just that
647 32bit childs are affected again. */
648 current->personality &= ~READ_IMPLIES_EXEC;
649}
650
651asmlinkage long sys_fork(struct pt_regs *regs)
652{
653 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
654}
655
Andi Kleena88cde12005-11-05 17:25:54 +0100656asmlinkage long
657sys_clone(unsigned long clone_flags, unsigned long newsp,
658 void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659{
660 if (!newsp)
661 newsp = regs->rsp;
662 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
663}
664
665/*
666 * This is trivial, and on the face of it looks like it
667 * could equally well be done in user mode.
668 *
669 * Not so, for quite unobvious reasons - register pressure.
670 * In user mode vfork() cannot have a stack frame, and if
671 * done by calling the "clone()" system call directly, you
672 * do not have enough call-clobbered registers to hold all
673 * the information you need.
674 */
675asmlinkage long sys_vfork(struct pt_regs *regs)
676{
677 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
678 NULL, NULL);
679}
680
681unsigned long get_wchan(struct task_struct *p)
682{
683 unsigned long stack;
684 u64 fp,rip;
685 int count = 0;
686
687 if (!p || p == current || p->state==TASK_RUNNING)
688 return 0;
689 stack = (unsigned long)p->thread_info;
690 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
691 return 0;
692 fp = *(u64 *)(p->thread.rsp);
693 do {
Andi Kleena88cde12005-11-05 17:25:54 +0100694 if (fp < (unsigned long)stack ||
695 fp > (unsigned long)stack+THREAD_SIZE)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700696 return 0;
697 rip = *(u64 *)(fp+8);
698 if (!in_sched_functions(rip))
699 return rip;
700 fp = *(u64 *)fp;
701 } while (count++ < 16);
702 return 0;
703}
704
705long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
706{
707 int ret = 0;
708 int doit = task == current;
709 int cpu;
710
711 switch (code) {
712 case ARCH_SET_GS:
Suresh Siddha84929802005-06-21 17:14:32 -0700713 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700714 return -EPERM;
715 cpu = get_cpu();
716 /* handle small bases via the GDT because that's faster to
717 switch. */
718 if (addr <= 0xffffffff) {
719 set_32bit_tls(task, GS_TLS, addr);
720 if (doit) {
721 load_TLS(&task->thread, cpu);
722 load_gs_index(GS_TLS_SEL);
723 }
724 task->thread.gsindex = GS_TLS_SEL;
725 task->thread.gs = 0;
726 } else {
727 task->thread.gsindex = 0;
728 task->thread.gs = addr;
729 if (doit) {
Andi Kleena88cde12005-11-05 17:25:54 +0100730 load_gs_index(0);
731 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 }
733 }
734 put_cpu();
735 break;
736 case ARCH_SET_FS:
737 /* Not strictly needed for fs, but do it for symmetry
738 with gs */
Suresh Siddha84929802005-06-21 17:14:32 -0700739 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 return -EPERM;
741 cpu = get_cpu();
742 /* handle small bases via the GDT because that's faster to
743 switch. */
744 if (addr <= 0xffffffff) {
745 set_32bit_tls(task, FS_TLS, addr);
746 if (doit) {
747 load_TLS(&task->thread, cpu);
Andi Kleena88cde12005-11-05 17:25:54 +0100748 asm volatile("movl %0,%%fs" :: "r"(FS_TLS_SEL));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700749 }
750 task->thread.fsindex = FS_TLS_SEL;
751 task->thread.fs = 0;
752 } else {
753 task->thread.fsindex = 0;
754 task->thread.fs = addr;
755 if (doit) {
756 /* set the selector to 0 to not confuse
757 __switch_to */
Andi Kleena88cde12005-11-05 17:25:54 +0100758 asm volatile("movl %0,%%fs" :: "r" (0));
759 ret = checking_wrmsrl(MSR_FS_BASE, addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700760 }
761 }
762 put_cpu();
763 break;
764 case ARCH_GET_FS: {
765 unsigned long base;
766 if (task->thread.fsindex == FS_TLS_SEL)
767 base = read_32bit_tls(task, FS_TLS);
Andi Kleena88cde12005-11-05 17:25:54 +0100768 else if (doit)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700769 rdmsrl(MSR_FS_BASE, base);
Andi Kleena88cde12005-11-05 17:25:54 +0100770 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700771 base = task->thread.fs;
772 ret = put_user(base, (unsigned long __user *)addr);
773 break;
774 }
775 case ARCH_GET_GS: {
776 unsigned long base;
777 if (task->thread.gsindex == GS_TLS_SEL)
778 base = read_32bit_tls(task, GS_TLS);
Andi Kleena88cde12005-11-05 17:25:54 +0100779 else if (doit)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700780 rdmsrl(MSR_KERNEL_GS_BASE, base);
Andi Kleena88cde12005-11-05 17:25:54 +0100781 else
Linus Torvalds1da177e2005-04-16 15:20:36 -0700782 base = task->thread.gs;
783 ret = put_user(base, (unsigned long __user *)addr);
784 break;
785 }
786
787 default:
788 ret = -EINVAL;
789 break;
790 }
791
792 return ret;
793}
794
795long sys_arch_prctl(int code, unsigned long addr)
796{
797 return do_arch_prctl(current, code, addr);
798}
799
800/*
801 * Capture the user space registers if the task is not running (in user space)
802 */
803int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
804{
805 struct pt_regs *pp, ptregs;
806
807 pp = (struct pt_regs *)(tsk->thread.rsp0);
808 --pp;
809
810 ptregs = *pp;
811 ptregs.cs &= 0xffff;
812 ptregs.ss &= 0xffff;
813
814 elf_core_copy_regs(regs, &ptregs);
815
816 return 1;
817}
818
819unsigned long arch_align_stack(unsigned long sp)
820{
821 if (randomize_va_space)
822 sp -= get_random_int() % 8192;
823 return sp & ~0xf;
824}