blob: b19cee6a12e6ca529cc09921f2320586da752ced [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86-64/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 * X86-64 port
10 * Andi Kleen.
Ashok Raj76e4f662005-06-25 14:55:00 -070011 *
12 * CPU hotplug support - ashok.raj@intel.com
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
14 */
15
16/*
17 * This file handles the architecture-dependent parts of process handling..
18 */
19
20#include <stdarg.h>
21
Ashok Raj76e4f662005-06-25 14:55:00 -070022#include <linux/cpu.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/errno.h>
24#include <linux/sched.h>
25#include <linux/kernel.h>
26#include <linux/mm.h>
27#include <linux/elfcore.h>
28#include <linux/smp.h>
29#include <linux/slab.h>
30#include <linux/user.h>
31#include <linux/module.h>
32#include <linux/a.out.h>
33#include <linux/interrupt.h>
34#include <linux/delay.h>
35#include <linux/irq.h>
36#include <linux/ptrace.h>
37#include <linux/utsname.h>
38#include <linux/random.h>
Rusty Lynch73649da2005-06-23 00:09:23 -070039#include <linux/kprobes.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040
41#include <asm/uaccess.h>
42#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
50#include <asm/kdebug.h>
51#include <asm/desc.h>
52#include <asm/proto.h>
53#include <asm/ia32.h>
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
59static atomic_t hlt_counter = ATOMIC_INIT(0);
60
61unsigned long boot_option_idle_override = 0;
62EXPORT_SYMBOL(boot_option_idle_override);
63
64/*
65 * Powermanagement idle function, if any..
66 */
67void (*pm_idle)(void);
68static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
69
70void disable_hlt(void)
71{
72 atomic_inc(&hlt_counter);
73}
74
75EXPORT_SYMBOL(disable_hlt);
76
77void enable_hlt(void)
78{
79 atomic_dec(&hlt_counter);
80}
81
82EXPORT_SYMBOL(enable_hlt);
83
84/*
85 * We use this if we don't have any better
86 * idle routine..
87 */
88void default_idle(void)
89{
90 if (!atomic_read(&hlt_counter)) {
91 local_irq_disable();
92 if (!need_resched())
93 safe_halt();
94 else
95 local_irq_enable();
96 }
97}
98
99/*
100 * On SMP it's slightly faster (but much more power-consuming!)
101 * to poll the ->need_resched flag instead of waiting for the
102 * cross-CPU IPI to arrive. Use this option with caution.
103 */
104static void poll_idle (void)
105{
106 int oldval;
107
108 local_irq_enable();
109
110 /*
111 * Deal with another CPU just having chosen a thread to
112 * run here:
113 */
114 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
115
116 if (!oldval) {
117 set_thread_flag(TIF_POLLING_NRFLAG);
118 asm volatile(
119 "2:"
120 "testl %0,%1;"
121 "rep; nop;"
122 "je 2b;"
123 : :
124 "i" (_TIF_NEED_RESCHED),
125 "m" (current_thread_info()->flags));
126 } else {
127 set_need_resched();
128 }
129}
130
131void cpu_idle_wait(void)
132{
133 unsigned int cpu, this_cpu = get_cpu();
134 cpumask_t map;
135
136 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
137 put_cpu();
138
139 cpus_clear(map);
140 for_each_online_cpu(cpu) {
141 per_cpu(cpu_idle_state, cpu) = 1;
142 cpu_set(cpu, map);
143 }
144
145 __get_cpu_var(cpu_idle_state) = 0;
146
147 wmb();
148 do {
149 ssleep(1);
150 for_each_online_cpu(cpu) {
151 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
152 cpu_clear(cpu, map);
153 }
154 cpus_and(map, map, cpu_online_map);
155 } while (!cpus_empty(map));
156}
157EXPORT_SYMBOL_GPL(cpu_idle_wait);
158
Ashok Raj76e4f662005-06-25 14:55:00 -0700159#ifdef CONFIG_HOTPLUG_CPU
160DECLARE_PER_CPU(int, cpu_state);
161
162#include <asm/nmi.h>
163/* We don't actually take CPU down, just spin without interrupts. */
164static inline void play_dead(void)
165{
166 idle_task_exit();
167 wbinvd();
168 mb();
169 /* Ack it */
170 __get_cpu_var(cpu_state) = CPU_DEAD;
171
172 while (1)
173 safe_halt();
174}
175#else
176static inline void play_dead(void)
177{
178 BUG();
179}
180#endif /* CONFIG_HOTPLUG_CPU */
181
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182/*
183 * The idle thread. There's no useful work to be
184 * done, so just try to conserve power and have a
185 * low exit latency (ie sit in a loop waiting for
186 * somebody to say that they'd like to reschedule)
187 */
188void cpu_idle (void)
189{
190 /* endless idle loop with no priority at all */
191 while (1) {
192 while (!need_resched()) {
193 void (*idle)(void);
194
195 if (__get_cpu_var(cpu_idle_state))
196 __get_cpu_var(cpu_idle_state) = 0;
197
198 rmb();
199 idle = pm_idle;
200 if (!idle)
201 idle = default_idle;
Ashok Raj76e4f662005-06-25 14:55:00 -0700202 if (cpu_is_offline(smp_processor_id()))
203 play_dead();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 idle();
205 }
206
207 schedule();
208 }
209}
210
211/*
212 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
213 * which can obviate IPI to trigger checking of need_resched.
214 * We execute MONITOR against need_resched and enter optimized wait state
215 * through MWAIT. Whenever someone changes need_resched, we would be woken
216 * up from MWAIT (without an IPI).
217 */
218static void mwait_idle(void)
219{
220 local_irq_enable();
221
222 if (!need_resched()) {
223 set_thread_flag(TIF_POLLING_NRFLAG);
224 do {
225 __monitor((void *)&current_thread_info()->flags, 0, 0);
226 if (need_resched())
227 break;
228 __mwait(0, 0);
229 } while (!need_resched());
230 clear_thread_flag(TIF_POLLING_NRFLAG);
231 }
232}
233
Ashok Raje6982c62005-06-25 14:54:58 -0700234void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235{
236 static int printed;
237 if (cpu_has(c, X86_FEATURE_MWAIT)) {
238 /*
239 * Skip, if setup has overridden idle.
240 * One CPU supports mwait => All CPUs supports mwait
241 */
242 if (!pm_idle) {
243 if (!printed) {
244 printk("using mwait in idle threads.\n");
245 printed = 1;
246 }
247 pm_idle = mwait_idle;
248 }
249 }
250}
251
252static int __init idle_setup (char *str)
253{
254 if (!strncmp(str, "poll", 4)) {
255 printk("using polling idle threads.\n");
256 pm_idle = poll_idle;
257 }
258
259 boot_option_idle_override = 1;
260 return 1;
261}
262
263__setup("idle=", idle_setup);
264
265/* Prints also some state that isn't saved in the pt_regs */
266void __show_regs(struct pt_regs * regs)
267{
268 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
269 unsigned int fsindex,gsindex;
270 unsigned int ds,cs,es;
271
272 printk("\n");
273 print_modules();
Andi Kleen9acf23c2005-09-12 18:49:24 +0200274 printk("Pid: %d, comm: %.20s %s %s %.*s\n",
275 current->pid, current->comm, print_tainted(),
276 system_utsname.release,
277 (int)strcspn(system_utsname.version, " "),
278 system_utsname.version);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700279 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
280 printk_address(regs->rip);
281 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
282 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
283 regs->rax, regs->rbx, regs->rcx);
284 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
285 regs->rdx, regs->rsi, regs->rdi);
286 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
287 regs->rbp, regs->r8, regs->r9);
288 printk("R10: %016lx R11: %016lx R12: %016lx\n",
289 regs->r10, regs->r11, regs->r12);
290 printk("R13: %016lx R14: %016lx R15: %016lx\n",
291 regs->r13, regs->r14, regs->r15);
292
293 asm("movl %%ds,%0" : "=r" (ds));
294 asm("movl %%cs,%0" : "=r" (cs));
295 asm("movl %%es,%0" : "=r" (es));
296 asm("movl %%fs,%0" : "=r" (fsindex));
297 asm("movl %%gs,%0" : "=r" (gsindex));
298
299 rdmsrl(MSR_FS_BASE, fs);
300 rdmsrl(MSR_GS_BASE, gs);
301 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
302
303 asm("movq %%cr0, %0": "=r" (cr0));
304 asm("movq %%cr2, %0": "=r" (cr2));
305 asm("movq %%cr3, %0": "=r" (cr3));
306 asm("movq %%cr4, %0": "=r" (cr4));
307
308 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
309 fs,fsindex,gs,gsindex,shadowgs);
310 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
311 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
312}
313
314void show_regs(struct pt_regs *regs)
315{
Zwane Mwaikamboc078d322005-09-06 15:16:16 -0700316 printk("CPU %d:", smp_processor_id());
Linus Torvalds1da177e2005-04-16 15:20:36 -0700317 __show_regs(regs);
318 show_trace(&regs->rsp);
319}
320
321/*
322 * Free current thread data structures etc..
323 */
324void exit_thread(void)
325{
326 struct task_struct *me = current;
327 struct thread_struct *t = &me->thread;
Rusty Lynch73649da2005-06-23 00:09:23 -0700328
329 /*
330 * Remove function-return probe instances associated with this task
331 * and put them back on the free list. Do not insert an exit probe for
332 * this function, it will be disabled by kprobe_flush_task if you do.
333 */
334 kprobe_flush_task(me);
335
Linus Torvalds1da177e2005-04-16 15:20:36 -0700336 if (me->thread.io_bitmap_ptr) {
337 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
338
339 kfree(t->io_bitmap_ptr);
340 t->io_bitmap_ptr = NULL;
341 /*
342 * Careful, clear this in the TSS too:
343 */
344 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
345 t->io_bitmap_max = 0;
346 put_cpu();
347 }
348}
349
350void flush_thread(void)
351{
352 struct task_struct *tsk = current;
353 struct thread_info *t = current_thread_info();
354
Rusty Lynch73649da2005-06-23 00:09:23 -0700355 /*
356 * Remove function-return probe instances associated with this task
357 * and put them back on the free list. Do not insert an exit probe for
358 * this function, it will be disabled by kprobe_flush_task if you do.
359 */
360 kprobe_flush_task(tsk);
361
Linus Torvalds1da177e2005-04-16 15:20:36 -0700362 if (t->flags & _TIF_ABI_PENDING)
363 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
364
365 tsk->thread.debugreg0 = 0;
366 tsk->thread.debugreg1 = 0;
367 tsk->thread.debugreg2 = 0;
368 tsk->thread.debugreg3 = 0;
369 tsk->thread.debugreg6 = 0;
370 tsk->thread.debugreg7 = 0;
371 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
372 /*
373 * Forget coprocessor state..
374 */
375 clear_fpu(tsk);
376 clear_used_math();
377}
378
379void release_thread(struct task_struct *dead_task)
380{
381 if (dead_task->mm) {
382 if (dead_task->mm->context.size) {
383 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
384 dead_task->comm,
385 dead_task->mm->context.ldt,
386 dead_task->mm->context.size);
387 BUG();
388 }
389 }
390}
391
392static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
393{
394 struct user_desc ud = {
395 .base_addr = addr,
396 .limit = 0xfffff,
397 .seg_32bit = 1,
398 .limit_in_pages = 1,
399 .useable = 1,
400 };
401 struct n_desc_struct *desc = (void *)t->thread.tls_array;
402 desc += tls;
403 desc->a = LDT_entry_a(&ud);
404 desc->b = LDT_entry_b(&ud);
405}
406
407static inline u32 read_32bit_tls(struct task_struct *t, int tls)
408{
409 struct desc_struct *desc = (void *)t->thread.tls_array;
410 desc += tls;
411 return desc->base0 |
412 (((u32)desc->base1) << 16) |
413 (((u32)desc->base2) << 24);
414}
415
416/*
417 * This gets called before we allocate a new thread and copy
418 * the current task into it.
419 */
420void prepare_to_copy(struct task_struct *tsk)
421{
422 unlazy_fpu(tsk);
423}
424
425int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
426 unsigned long unused,
427 struct task_struct * p, struct pt_regs * regs)
428{
429 int err;
430 struct pt_regs * childregs;
431 struct task_struct *me = current;
432
433 childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
434
435 *childregs = *regs;
436
437 childregs->rax = 0;
438 childregs->rsp = rsp;
439 if (rsp == ~0UL) {
440 childregs->rsp = (unsigned long)childregs;
441 }
442
443 p->thread.rsp = (unsigned long) childregs;
444 p->thread.rsp0 = (unsigned long) (childregs+1);
445 p->thread.userrsp = me->thread.userrsp;
446
447 set_ti_thread_flag(p->thread_info, TIF_FORK);
448
449 p->thread.fs = me->thread.fs;
450 p->thread.gs = me->thread.gs;
451
H. J. Lufd51f662005-05-01 08:58:48 -0700452 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
453 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
454 asm("mov %%es,%0" : "=m" (p->thread.es));
455 asm("mov %%ds,%0" : "=m" (p->thread.ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456
457 if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
458 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
459 if (!p->thread.io_bitmap_ptr) {
460 p->thread.io_bitmap_max = 0;
461 return -ENOMEM;
462 }
463 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES);
464 }
465
466 /*
467 * Set a new TLS for the child thread?
468 */
469 if (clone_flags & CLONE_SETTLS) {
470#ifdef CONFIG_IA32_EMULATION
471 if (test_thread_flag(TIF_IA32))
472 err = ia32_child_tls(p, childregs);
473 else
474#endif
475 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
476 if (err)
477 goto out;
478 }
479 err = 0;
480out:
481 if (err && p->thread.io_bitmap_ptr) {
482 kfree(p->thread.io_bitmap_ptr);
483 p->thread.io_bitmap_max = 0;
484 }
485 return err;
486}
487
488/*
Andrea Arcangeliffaa8bd2005-06-27 14:36:36 -0700489 * This function selects if the context switch from prev to next
490 * has to tweak the TSC disable bit in the cr4.
491 */
492static inline void disable_tsc(struct task_struct *prev_p,
493 struct task_struct *next_p)
494{
495 struct thread_info *prev, *next;
496
497 /*
498 * gcc should eliminate the ->thread_info dereference if
499 * has_secure_computing returns 0 at compile time (SECCOMP=n).
500 */
501 prev = prev_p->thread_info;
502 next = next_p->thread_info;
503
504 if (has_secure_computing(prev) || has_secure_computing(next)) {
505 /* slow path here */
506 if (has_secure_computing(prev) &&
507 !has_secure_computing(next)) {
508 write_cr4(read_cr4() & ~X86_CR4_TSD);
509 } else if (!has_secure_computing(prev) &&
510 has_secure_computing(next))
511 write_cr4(read_cr4() | X86_CR4_TSD);
512 }
513}
514
515/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516 * This special macro can be used to load a debugging register
517 */
518#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r)
519
520/*
521 * switch_to(x,y) should switch tasks from x to y.
522 *
523 * This could still be optimized:
524 * - fold all the options into a flag word and test it with a single test.
525 * - could test fs/gs bitsliced
526 */
527struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
528{
529 struct thread_struct *prev = &prev_p->thread,
530 *next = &next_p->thread;
531 int cpu = smp_processor_id();
532 struct tss_struct *tss = &per_cpu(init_tss, cpu);
533
534 unlazy_fpu(prev_p);
535
536 /*
537 * Reload esp0, LDT and the page table pointer:
538 */
539 tss->rsp0 = next->rsp0;
540
541 /*
542 * Switch DS and ES.
543 * This won't pick up thread selector changes, but I guess that is ok.
544 */
H. J. Lufd51f662005-05-01 08:58:48 -0700545 asm volatile("mov %%es,%0" : "=m" (prev->es));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700546 if (unlikely(next->es | prev->es))
547 loadsegment(es, next->es);
548
H. J. Lufd51f662005-05-01 08:58:48 -0700549 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700550 if (unlikely(next->ds | prev->ds))
551 loadsegment(ds, next->ds);
552
553 load_TLS(next, cpu);
554
555 /*
556 * Switch FS and GS.
557 */
558 {
559 unsigned fsindex;
560 asm volatile("movl %%fs,%0" : "=r" (fsindex));
561 /* segment register != 0 always requires a reload.
562 also reload when it has changed.
563 when prev process used 64bit base always reload
564 to avoid an information leak. */
565 if (unlikely(fsindex | next->fsindex | prev->fs)) {
566 loadsegment(fs, next->fsindex);
567 /* check if the user used a selector != 0
568 * if yes clear 64bit base, since overloaded base
569 * is always mapped to the Null selector
570 */
571 if (fsindex)
572 prev->fs = 0;
573 }
574 /* when next process has a 64bit base use it */
575 if (next->fs)
576 wrmsrl(MSR_FS_BASE, next->fs);
577 prev->fsindex = fsindex;
578 }
579 {
580 unsigned gsindex;
581 asm volatile("movl %%gs,%0" : "=r" (gsindex));
582 if (unlikely(gsindex | next->gsindex | prev->gs)) {
583 load_gs_index(next->gsindex);
584 if (gsindex)
585 prev->gs = 0;
586 }
587 if (next->gs)
588 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
589 prev->gsindex = gsindex;
590 }
591
592 /*
593 * Switch the PDA context.
594 */
595 prev->userrsp = read_pda(oldrsp);
596 write_pda(oldrsp, next->userrsp);
597 write_pda(pcurrent, next_p);
598 write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
599
600 /*
601 * Now maybe reload the debug registers
602 */
603 if (unlikely(next->debugreg7)) {
604 loaddebug(next, 0);
605 loaddebug(next, 1);
606 loaddebug(next, 2);
607 loaddebug(next, 3);
608 /* no 4 and 5 */
609 loaddebug(next, 6);
610 loaddebug(next, 7);
611 }
612
613
614 /*
615 * Handle the IO bitmap
616 */
617 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
618 if (next->io_bitmap_ptr)
619 /*
620 * Copy the relevant range of the IO bitmap.
621 * Normally this is 128 bytes or less:
622 */
623 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
624 max(prev->io_bitmap_max, next->io_bitmap_max));
625 else {
626 /*
627 * Clear any possible leftover bits:
628 */
629 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
630 }
631 }
632
Andrea Arcangeliffaa8bd2005-06-27 14:36:36 -0700633 disable_tsc(prev_p, next_p);
634
Linus Torvalds1da177e2005-04-16 15:20:36 -0700635 return prev_p;
636}
637
638/*
639 * sys_execve() executes a new program.
640 */
641asmlinkage
642long sys_execve(char __user *name, char __user * __user *argv,
643 char __user * __user *envp, struct pt_regs regs)
644{
645 long error;
646 char * filename;
647
648 filename = getname(name);
649 error = PTR_ERR(filename);
650 if (IS_ERR(filename))
651 return error;
652 error = do_execve(filename, argv, envp, &regs);
653 if (error == 0) {
654 task_lock(current);
655 current->ptrace &= ~PT_DTRACE;
656 task_unlock(current);
657 }
658 putname(filename);
659 return error;
660}
661
662void set_personality_64bit(void)
663{
664 /* inherit personality from parent */
665
666 /* Make sure to be in 64bit mode */
667 clear_thread_flag(TIF_IA32);
668
669 /* TBD: overwrites user setup. Should have two bits.
670 But 64bit processes have always behaved this way,
671 so it's not too bad. The main problem is just that
672 32bit childs are affected again. */
673 current->personality &= ~READ_IMPLIES_EXEC;
674}
675
676asmlinkage long sys_fork(struct pt_regs *regs)
677{
678 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
679}
680
681asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
682{
683 if (!newsp)
684 newsp = regs->rsp;
685 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
686}
687
688/*
689 * This is trivial, and on the face of it looks like it
690 * could equally well be done in user mode.
691 *
692 * Not so, for quite unobvious reasons - register pressure.
693 * In user mode vfork() cannot have a stack frame, and if
694 * done by calling the "clone()" system call directly, you
695 * do not have enough call-clobbered registers to hold all
696 * the information you need.
697 */
698asmlinkage long sys_vfork(struct pt_regs *regs)
699{
700 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
701 NULL, NULL);
702}
703
704unsigned long get_wchan(struct task_struct *p)
705{
706 unsigned long stack;
707 u64 fp,rip;
708 int count = 0;
709
710 if (!p || p == current || p->state==TASK_RUNNING)
711 return 0;
712 stack = (unsigned long)p->thread_info;
713 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
714 return 0;
715 fp = *(u64 *)(p->thread.rsp);
716 do {
717 if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE)
718 return 0;
719 rip = *(u64 *)(fp+8);
720 if (!in_sched_functions(rip))
721 return rip;
722 fp = *(u64 *)fp;
723 } while (count++ < 16);
724 return 0;
725}
726
727long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
728{
729 int ret = 0;
730 int doit = task == current;
731 int cpu;
732
733 switch (code) {
734 case ARCH_SET_GS:
Suresh Siddha84929802005-06-21 17:14:32 -0700735 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736 return -EPERM;
737 cpu = get_cpu();
738 /* handle small bases via the GDT because that's faster to
739 switch. */
740 if (addr <= 0xffffffff) {
741 set_32bit_tls(task, GS_TLS, addr);
742 if (doit) {
743 load_TLS(&task->thread, cpu);
744 load_gs_index(GS_TLS_SEL);
745 }
746 task->thread.gsindex = GS_TLS_SEL;
747 task->thread.gs = 0;
748 } else {
749 task->thread.gsindex = 0;
750 task->thread.gs = addr;
751 if (doit) {
752 load_gs_index(0);
753 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
754 }
755 }
756 put_cpu();
757 break;
758 case ARCH_SET_FS:
759 /* Not strictly needed for fs, but do it for symmetry
760 with gs */
Suresh Siddha84929802005-06-21 17:14:32 -0700761 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 return -EPERM;
763 cpu = get_cpu();
764 /* handle small bases via the GDT because that's faster to
765 switch. */
766 if (addr <= 0xffffffff) {
767 set_32bit_tls(task, FS_TLS, addr);
768 if (doit) {
769 load_TLS(&task->thread, cpu);
770 asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL));
771 }
772 task->thread.fsindex = FS_TLS_SEL;
773 task->thread.fs = 0;
774 } else {
775 task->thread.fsindex = 0;
776 task->thread.fs = addr;
777 if (doit) {
778 /* set the selector to 0 to not confuse
779 __switch_to */
780 asm volatile("movl %0,%%fs" :: "r" (0));
781 ret = checking_wrmsrl(MSR_FS_BASE, addr);
782 }
783 }
784 put_cpu();
785 break;
786 case ARCH_GET_FS: {
787 unsigned long base;
788 if (task->thread.fsindex == FS_TLS_SEL)
789 base = read_32bit_tls(task, FS_TLS);
790 else if (doit) {
791 rdmsrl(MSR_FS_BASE, base);
792 } else
793 base = task->thread.fs;
794 ret = put_user(base, (unsigned long __user *)addr);
795 break;
796 }
797 case ARCH_GET_GS: {
798 unsigned long base;
799 if (task->thread.gsindex == GS_TLS_SEL)
800 base = read_32bit_tls(task, GS_TLS);
801 else if (doit) {
802 rdmsrl(MSR_KERNEL_GS_BASE, base);
803 } else
804 base = task->thread.gs;
805 ret = put_user(base, (unsigned long __user *)addr);
806 break;
807 }
808
809 default:
810 ret = -EINVAL;
811 break;
812 }
813
814 return ret;
815}
816
817long sys_arch_prctl(int code, unsigned long addr)
818{
819 return do_arch_prctl(current, code, addr);
820}
821
822/*
823 * Capture the user space registers if the task is not running (in user space)
824 */
825int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
826{
827 struct pt_regs *pp, ptregs;
828
829 pp = (struct pt_regs *)(tsk->thread.rsp0);
830 --pp;
831
832 ptregs = *pp;
833 ptregs.cs &= 0xffff;
834 ptregs.ss &= 0xffff;
835
836 elf_core_copy_regs(regs, &ptregs);
837
838 return 1;
839}
840
841unsigned long arch_align_stack(unsigned long sp)
842{
843 if (randomize_va_space)
844 sp -= get_random_int() % 8192;
845 return sp & ~0xf;
846}