blob: 8661f82ac70b409d96a70b31b84ad87c485fbd30 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86-64/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 * X86-64 port
10 * Andi Kleen.
Ashok Raj76e4f662005-06-25 14:55:00 -070011 *
12 * CPU hotplug support - ashok.raj@intel.com
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
14 */
15
16/*
17 * This file handles the architecture-dependent parts of process handling..
18 */
19
20#include <stdarg.h>
21
Ashok Raj76e4f662005-06-25 14:55:00 -070022#include <linux/cpu.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/errno.h>
24#include <linux/sched.h>
25#include <linux/kernel.h>
26#include <linux/mm.h>
27#include <linux/elfcore.h>
28#include <linux/smp.h>
29#include <linux/slab.h>
30#include <linux/user.h>
31#include <linux/module.h>
32#include <linux/a.out.h>
33#include <linux/interrupt.h>
34#include <linux/delay.h>
35#include <linux/irq.h>
36#include <linux/ptrace.h>
37#include <linux/utsname.h>
38#include <linux/random.h>
Rusty Lynch73649da2005-06-23 00:09:23 -070039#include <linux/kprobes.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040
41#include <asm/uaccess.h>
42#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
50#include <asm/kdebug.h>
51#include <asm/desc.h>
52#include <asm/proto.h>
53#include <asm/ia32.h>
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
59static atomic_t hlt_counter = ATOMIC_INIT(0);
60
61unsigned long boot_option_idle_override = 0;
62EXPORT_SYMBOL(boot_option_idle_override);
63
64/*
65 * Powermanagement idle function, if any..
66 */
67void (*pm_idle)(void);
68static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
69
70void disable_hlt(void)
71{
72 atomic_inc(&hlt_counter);
73}
74
75EXPORT_SYMBOL(disable_hlt);
76
77void enable_hlt(void)
78{
79 atomic_dec(&hlt_counter);
80}
81
82EXPORT_SYMBOL(enable_hlt);
83
84/*
85 * We use this if we don't have any better
86 * idle routine..
87 */
88void default_idle(void)
89{
90 if (!atomic_read(&hlt_counter)) {
91 local_irq_disable();
92 if (!need_resched())
93 safe_halt();
94 else
95 local_irq_enable();
96 }
97}
98
99/*
100 * On SMP it's slightly faster (but much more power-consuming!)
101 * to poll the ->need_resched flag instead of waiting for the
102 * cross-CPU IPI to arrive. Use this option with caution.
103 */
104static void poll_idle (void)
105{
106 int oldval;
107
108 local_irq_enable();
109
110 /*
111 * Deal with another CPU just having chosen a thread to
112 * run here:
113 */
114 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
115
116 if (!oldval) {
117 set_thread_flag(TIF_POLLING_NRFLAG);
118 asm volatile(
119 "2:"
120 "testl %0,%1;"
121 "rep; nop;"
122 "je 2b;"
123 : :
124 "i" (_TIF_NEED_RESCHED),
125 "m" (current_thread_info()->flags));
126 } else {
127 set_need_resched();
128 }
129}
130
131void cpu_idle_wait(void)
132{
133 unsigned int cpu, this_cpu = get_cpu();
134 cpumask_t map;
135
136 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
137 put_cpu();
138
139 cpus_clear(map);
140 for_each_online_cpu(cpu) {
141 per_cpu(cpu_idle_state, cpu) = 1;
142 cpu_set(cpu, map);
143 }
144
145 __get_cpu_var(cpu_idle_state) = 0;
146
147 wmb();
148 do {
149 ssleep(1);
150 for_each_online_cpu(cpu) {
151 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
152 cpu_clear(cpu, map);
153 }
154 cpus_and(map, map, cpu_online_map);
155 } while (!cpus_empty(map));
156}
157EXPORT_SYMBOL_GPL(cpu_idle_wait);
158
Ashok Raj76e4f662005-06-25 14:55:00 -0700159#ifdef CONFIG_HOTPLUG_CPU
160DECLARE_PER_CPU(int, cpu_state);
161
162#include <asm/nmi.h>
163/* We don't actually take CPU down, just spin without interrupts. */
164static inline void play_dead(void)
165{
166 idle_task_exit();
167 wbinvd();
168 mb();
169 /* Ack it */
170 __get_cpu_var(cpu_state) = CPU_DEAD;
171
172 while (1)
173 safe_halt();
174}
175#else
176static inline void play_dead(void)
177{
178 BUG();
179}
180#endif /* CONFIG_HOTPLUG_CPU */
181
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182/*
183 * The idle thread. There's no useful work to be
184 * done, so just try to conserve power and have a
185 * low exit latency (ie sit in a loop waiting for
186 * somebody to say that they'd like to reschedule)
187 */
188void cpu_idle (void)
189{
190 /* endless idle loop with no priority at all */
191 while (1) {
192 while (!need_resched()) {
193 void (*idle)(void);
194
195 if (__get_cpu_var(cpu_idle_state))
196 __get_cpu_var(cpu_idle_state) = 0;
197
198 rmb();
199 idle = pm_idle;
200 if (!idle)
201 idle = default_idle;
Ashok Raj76e4f662005-06-25 14:55:00 -0700202 if (cpu_is_offline(smp_processor_id()))
203 play_dead();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 idle();
205 }
206
207 schedule();
208 }
209}
210
211/*
212 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
213 * which can obviate IPI to trigger checking of need_resched.
214 * We execute MONITOR against need_resched and enter optimized wait state
215 * through MWAIT. Whenever someone changes need_resched, we would be woken
216 * up from MWAIT (without an IPI).
217 */
218static void mwait_idle(void)
219{
220 local_irq_enable();
221
222 if (!need_resched()) {
223 set_thread_flag(TIF_POLLING_NRFLAG);
224 do {
225 __monitor((void *)&current_thread_info()->flags, 0, 0);
226 if (need_resched())
227 break;
228 __mwait(0, 0);
229 } while (!need_resched());
230 clear_thread_flag(TIF_POLLING_NRFLAG);
231 }
232}
233
Ashok Raje6982c62005-06-25 14:54:58 -0700234void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235{
236 static int printed;
237 if (cpu_has(c, X86_FEATURE_MWAIT)) {
238 /*
239 * Skip, if setup has overridden idle.
240 * One CPU supports mwait => All CPUs supports mwait
241 */
242 if (!pm_idle) {
243 if (!printed) {
244 printk("using mwait in idle threads.\n");
245 printed = 1;
246 }
247 pm_idle = mwait_idle;
248 }
249 }
250}
251
252static int __init idle_setup (char *str)
253{
254 if (!strncmp(str, "poll", 4)) {
255 printk("using polling idle threads.\n");
256 pm_idle = poll_idle;
257 }
258
259 boot_option_idle_override = 1;
260 return 1;
261}
262
263__setup("idle=", idle_setup);
264
265/* Prints also some state that isn't saved in the pt_regs */
266void __show_regs(struct pt_regs * regs)
267{
268 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
269 unsigned int fsindex,gsindex;
270 unsigned int ds,cs,es;
271
272 printk("\n");
273 print_modules();
274 printk("Pid: %d, comm: %.20s %s %s\n",
275 current->pid, current->comm, print_tainted(), system_utsname.release);
276 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
277 printk_address(regs->rip);
278 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
279 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
280 regs->rax, regs->rbx, regs->rcx);
281 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
282 regs->rdx, regs->rsi, regs->rdi);
283 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
284 regs->rbp, regs->r8, regs->r9);
285 printk("R10: %016lx R11: %016lx R12: %016lx\n",
286 regs->r10, regs->r11, regs->r12);
287 printk("R13: %016lx R14: %016lx R15: %016lx\n",
288 regs->r13, regs->r14, regs->r15);
289
290 asm("movl %%ds,%0" : "=r" (ds));
291 asm("movl %%cs,%0" : "=r" (cs));
292 asm("movl %%es,%0" : "=r" (es));
293 asm("movl %%fs,%0" : "=r" (fsindex));
294 asm("movl %%gs,%0" : "=r" (gsindex));
295
296 rdmsrl(MSR_FS_BASE, fs);
297 rdmsrl(MSR_GS_BASE, gs);
298 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
299
300 asm("movq %%cr0, %0": "=r" (cr0));
301 asm("movq %%cr2, %0": "=r" (cr2));
302 asm("movq %%cr3, %0": "=r" (cr3));
303 asm("movq %%cr4, %0": "=r" (cr4));
304
305 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
306 fs,fsindex,gs,gsindex,shadowgs);
307 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
308 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
309}
310
311void show_regs(struct pt_regs *regs)
312{
Zwane Mwaikamboc078d322005-09-06 15:16:16 -0700313 printk("CPU %d:", smp_processor_id());
Linus Torvalds1da177e2005-04-16 15:20:36 -0700314 __show_regs(regs);
315 show_trace(&regs->rsp);
316}
317
318/*
319 * Free current thread data structures etc..
320 */
321void exit_thread(void)
322{
323 struct task_struct *me = current;
324 struct thread_struct *t = &me->thread;
Rusty Lynch73649da2005-06-23 00:09:23 -0700325
326 /*
327 * Remove function-return probe instances associated with this task
328 * and put them back on the free list. Do not insert an exit probe for
329 * this function, it will be disabled by kprobe_flush_task if you do.
330 */
331 kprobe_flush_task(me);
332
Linus Torvalds1da177e2005-04-16 15:20:36 -0700333 if (me->thread.io_bitmap_ptr) {
334 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
335
336 kfree(t->io_bitmap_ptr);
337 t->io_bitmap_ptr = NULL;
338 /*
339 * Careful, clear this in the TSS too:
340 */
341 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
342 t->io_bitmap_max = 0;
343 put_cpu();
344 }
345}
346
347void flush_thread(void)
348{
349 struct task_struct *tsk = current;
350 struct thread_info *t = current_thread_info();
351
Rusty Lynch73649da2005-06-23 00:09:23 -0700352 /*
353 * Remove function-return probe instances associated with this task
354 * and put them back on the free list. Do not insert an exit probe for
355 * this function, it will be disabled by kprobe_flush_task if you do.
356 */
357 kprobe_flush_task(tsk);
358
Linus Torvalds1da177e2005-04-16 15:20:36 -0700359 if (t->flags & _TIF_ABI_PENDING)
360 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
361
362 tsk->thread.debugreg0 = 0;
363 tsk->thread.debugreg1 = 0;
364 tsk->thread.debugreg2 = 0;
365 tsk->thread.debugreg3 = 0;
366 tsk->thread.debugreg6 = 0;
367 tsk->thread.debugreg7 = 0;
368 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
369 /*
370 * Forget coprocessor state..
371 */
372 clear_fpu(tsk);
373 clear_used_math();
374}
375
376void release_thread(struct task_struct *dead_task)
377{
378 if (dead_task->mm) {
379 if (dead_task->mm->context.size) {
380 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
381 dead_task->comm,
382 dead_task->mm->context.ldt,
383 dead_task->mm->context.size);
384 BUG();
385 }
386 }
387}
388
389static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
390{
391 struct user_desc ud = {
392 .base_addr = addr,
393 .limit = 0xfffff,
394 .seg_32bit = 1,
395 .limit_in_pages = 1,
396 .useable = 1,
397 };
398 struct n_desc_struct *desc = (void *)t->thread.tls_array;
399 desc += tls;
400 desc->a = LDT_entry_a(&ud);
401 desc->b = LDT_entry_b(&ud);
402}
403
404static inline u32 read_32bit_tls(struct task_struct *t, int tls)
405{
406 struct desc_struct *desc = (void *)t->thread.tls_array;
407 desc += tls;
408 return desc->base0 |
409 (((u32)desc->base1) << 16) |
410 (((u32)desc->base2) << 24);
411}
412
413/*
414 * This gets called before we allocate a new thread and copy
415 * the current task into it.
416 */
417void prepare_to_copy(struct task_struct *tsk)
418{
419 unlazy_fpu(tsk);
420}
421
422int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
423 unsigned long unused,
424 struct task_struct * p, struct pt_regs * regs)
425{
426 int err;
427 struct pt_regs * childregs;
428 struct task_struct *me = current;
429
430 childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
431
432 *childregs = *regs;
433
434 childregs->rax = 0;
435 childregs->rsp = rsp;
436 if (rsp == ~0UL) {
437 childregs->rsp = (unsigned long)childregs;
438 }
439
440 p->thread.rsp = (unsigned long) childregs;
441 p->thread.rsp0 = (unsigned long) (childregs+1);
442 p->thread.userrsp = me->thread.userrsp;
443
444 set_ti_thread_flag(p->thread_info, TIF_FORK);
445
446 p->thread.fs = me->thread.fs;
447 p->thread.gs = me->thread.gs;
448
H. J. Lufd51f662005-05-01 08:58:48 -0700449 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
450 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
451 asm("mov %%es,%0" : "=m" (p->thread.es));
452 asm("mov %%ds,%0" : "=m" (p->thread.ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453
454 if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
455 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
456 if (!p->thread.io_bitmap_ptr) {
457 p->thread.io_bitmap_max = 0;
458 return -ENOMEM;
459 }
460 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES);
461 }
462
463 /*
464 * Set a new TLS for the child thread?
465 */
466 if (clone_flags & CLONE_SETTLS) {
467#ifdef CONFIG_IA32_EMULATION
468 if (test_thread_flag(TIF_IA32))
469 err = ia32_child_tls(p, childregs);
470 else
471#endif
472 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
473 if (err)
474 goto out;
475 }
476 err = 0;
477out:
478 if (err && p->thread.io_bitmap_ptr) {
479 kfree(p->thread.io_bitmap_ptr);
480 p->thread.io_bitmap_max = 0;
481 }
482 return err;
483}
484
485/*
Andrea Arcangeliffaa8bd2005-06-27 14:36:36 -0700486 * This function selects if the context switch from prev to next
487 * has to tweak the TSC disable bit in the cr4.
488 */
489static inline void disable_tsc(struct task_struct *prev_p,
490 struct task_struct *next_p)
491{
492 struct thread_info *prev, *next;
493
494 /*
495 * gcc should eliminate the ->thread_info dereference if
496 * has_secure_computing returns 0 at compile time (SECCOMP=n).
497 */
498 prev = prev_p->thread_info;
499 next = next_p->thread_info;
500
501 if (has_secure_computing(prev) || has_secure_computing(next)) {
502 /* slow path here */
503 if (has_secure_computing(prev) &&
504 !has_secure_computing(next)) {
505 write_cr4(read_cr4() & ~X86_CR4_TSD);
506 } else if (!has_secure_computing(prev) &&
507 has_secure_computing(next))
508 write_cr4(read_cr4() | X86_CR4_TSD);
509 }
510}
511
512/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513 * This special macro can be used to load a debugging register
514 */
515#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r)
516
517/*
518 * switch_to(x,y) should switch tasks from x to y.
519 *
520 * This could still be optimized:
521 * - fold all the options into a flag word and test it with a single test.
522 * - could test fs/gs bitsliced
523 */
524struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
525{
526 struct thread_struct *prev = &prev_p->thread,
527 *next = &next_p->thread;
528 int cpu = smp_processor_id();
529 struct tss_struct *tss = &per_cpu(init_tss, cpu);
530
531 unlazy_fpu(prev_p);
532
533 /*
534 * Reload esp0, LDT and the page table pointer:
535 */
536 tss->rsp0 = next->rsp0;
537
538 /*
539 * Switch DS and ES.
540 * This won't pick up thread selector changes, but I guess that is ok.
541 */
H. J. Lufd51f662005-05-01 08:58:48 -0700542 asm volatile("mov %%es,%0" : "=m" (prev->es));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543 if (unlikely(next->es | prev->es))
544 loadsegment(es, next->es);
545
H. J. Lufd51f662005-05-01 08:58:48 -0700546 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700547 if (unlikely(next->ds | prev->ds))
548 loadsegment(ds, next->ds);
549
550 load_TLS(next, cpu);
551
552 /*
553 * Switch FS and GS.
554 */
555 {
556 unsigned fsindex;
557 asm volatile("movl %%fs,%0" : "=r" (fsindex));
558 /* segment register != 0 always requires a reload.
559 also reload when it has changed.
560 when prev process used 64bit base always reload
561 to avoid an information leak. */
562 if (unlikely(fsindex | next->fsindex | prev->fs)) {
563 loadsegment(fs, next->fsindex);
564 /* check if the user used a selector != 0
565 * if yes clear 64bit base, since overloaded base
566 * is always mapped to the Null selector
567 */
568 if (fsindex)
569 prev->fs = 0;
570 }
571 /* when next process has a 64bit base use it */
572 if (next->fs)
573 wrmsrl(MSR_FS_BASE, next->fs);
574 prev->fsindex = fsindex;
575 }
576 {
577 unsigned gsindex;
578 asm volatile("movl %%gs,%0" : "=r" (gsindex));
579 if (unlikely(gsindex | next->gsindex | prev->gs)) {
580 load_gs_index(next->gsindex);
581 if (gsindex)
582 prev->gs = 0;
583 }
584 if (next->gs)
585 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
586 prev->gsindex = gsindex;
587 }
588
589 /*
590 * Switch the PDA context.
591 */
592 prev->userrsp = read_pda(oldrsp);
593 write_pda(oldrsp, next->userrsp);
594 write_pda(pcurrent, next_p);
595 write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
596
597 /*
598 * Now maybe reload the debug registers
599 */
600 if (unlikely(next->debugreg7)) {
601 loaddebug(next, 0);
602 loaddebug(next, 1);
603 loaddebug(next, 2);
604 loaddebug(next, 3);
605 /* no 4 and 5 */
606 loaddebug(next, 6);
607 loaddebug(next, 7);
608 }
609
610
611 /*
612 * Handle the IO bitmap
613 */
614 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
615 if (next->io_bitmap_ptr)
616 /*
617 * Copy the relevant range of the IO bitmap.
618 * Normally this is 128 bytes or less:
619 */
620 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
621 max(prev->io_bitmap_max, next->io_bitmap_max));
622 else {
623 /*
624 * Clear any possible leftover bits:
625 */
626 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
627 }
628 }
629
Andrea Arcangeliffaa8bd2005-06-27 14:36:36 -0700630 disable_tsc(prev_p, next_p);
631
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 return prev_p;
633}
634
635/*
636 * sys_execve() executes a new program.
637 */
638asmlinkage
639long sys_execve(char __user *name, char __user * __user *argv,
640 char __user * __user *envp, struct pt_regs regs)
641{
642 long error;
643 char * filename;
644
645 filename = getname(name);
646 error = PTR_ERR(filename);
647 if (IS_ERR(filename))
648 return error;
649 error = do_execve(filename, argv, envp, &regs);
650 if (error == 0) {
651 task_lock(current);
652 current->ptrace &= ~PT_DTRACE;
653 task_unlock(current);
654 }
655 putname(filename);
656 return error;
657}
658
659void set_personality_64bit(void)
660{
661 /* inherit personality from parent */
662
663 /* Make sure to be in 64bit mode */
664 clear_thread_flag(TIF_IA32);
665
666 /* TBD: overwrites user setup. Should have two bits.
667 But 64bit processes have always behaved this way,
668 so it's not too bad. The main problem is just that
669 32bit childs are affected again. */
670 current->personality &= ~READ_IMPLIES_EXEC;
671}
672
673asmlinkage long sys_fork(struct pt_regs *regs)
674{
675 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
676}
677
678asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
679{
680 if (!newsp)
681 newsp = regs->rsp;
682 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
683}
684
685/*
686 * This is trivial, and on the face of it looks like it
687 * could equally well be done in user mode.
688 *
689 * Not so, for quite unobvious reasons - register pressure.
690 * In user mode vfork() cannot have a stack frame, and if
691 * done by calling the "clone()" system call directly, you
692 * do not have enough call-clobbered registers to hold all
693 * the information you need.
694 */
695asmlinkage long sys_vfork(struct pt_regs *regs)
696{
697 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
698 NULL, NULL);
699}
700
701unsigned long get_wchan(struct task_struct *p)
702{
703 unsigned long stack;
704 u64 fp,rip;
705 int count = 0;
706
707 if (!p || p == current || p->state==TASK_RUNNING)
708 return 0;
709 stack = (unsigned long)p->thread_info;
710 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
711 return 0;
712 fp = *(u64 *)(p->thread.rsp);
713 do {
714 if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE)
715 return 0;
716 rip = *(u64 *)(fp+8);
717 if (!in_sched_functions(rip))
718 return rip;
719 fp = *(u64 *)fp;
720 } while (count++ < 16);
721 return 0;
722}
723
724long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
725{
726 int ret = 0;
727 int doit = task == current;
728 int cpu;
729
730 switch (code) {
731 case ARCH_SET_GS:
Suresh Siddha84929802005-06-21 17:14:32 -0700732 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 return -EPERM;
734 cpu = get_cpu();
735 /* handle small bases via the GDT because that's faster to
736 switch. */
737 if (addr <= 0xffffffff) {
738 set_32bit_tls(task, GS_TLS, addr);
739 if (doit) {
740 load_TLS(&task->thread, cpu);
741 load_gs_index(GS_TLS_SEL);
742 }
743 task->thread.gsindex = GS_TLS_SEL;
744 task->thread.gs = 0;
745 } else {
746 task->thread.gsindex = 0;
747 task->thread.gs = addr;
748 if (doit) {
749 load_gs_index(0);
750 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
751 }
752 }
753 put_cpu();
754 break;
755 case ARCH_SET_FS:
756 /* Not strictly needed for fs, but do it for symmetry
757 with gs */
Suresh Siddha84929802005-06-21 17:14:32 -0700758 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 return -EPERM;
760 cpu = get_cpu();
761 /* handle small bases via the GDT because that's faster to
762 switch. */
763 if (addr <= 0xffffffff) {
764 set_32bit_tls(task, FS_TLS, addr);
765 if (doit) {
766 load_TLS(&task->thread, cpu);
767 asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL));
768 }
769 task->thread.fsindex = FS_TLS_SEL;
770 task->thread.fs = 0;
771 } else {
772 task->thread.fsindex = 0;
773 task->thread.fs = addr;
774 if (doit) {
775 /* set the selector to 0 to not confuse
776 __switch_to */
777 asm volatile("movl %0,%%fs" :: "r" (0));
778 ret = checking_wrmsrl(MSR_FS_BASE, addr);
779 }
780 }
781 put_cpu();
782 break;
783 case ARCH_GET_FS: {
784 unsigned long base;
785 if (task->thread.fsindex == FS_TLS_SEL)
786 base = read_32bit_tls(task, FS_TLS);
787 else if (doit) {
788 rdmsrl(MSR_FS_BASE, base);
789 } else
790 base = task->thread.fs;
791 ret = put_user(base, (unsigned long __user *)addr);
792 break;
793 }
794 case ARCH_GET_GS: {
795 unsigned long base;
796 if (task->thread.gsindex == GS_TLS_SEL)
797 base = read_32bit_tls(task, GS_TLS);
798 else if (doit) {
799 rdmsrl(MSR_KERNEL_GS_BASE, base);
800 } else
801 base = task->thread.gs;
802 ret = put_user(base, (unsigned long __user *)addr);
803 break;
804 }
805
806 default:
807 ret = -EINVAL;
808 break;
809 }
810
811 return ret;
812}
813
814long sys_arch_prctl(int code, unsigned long addr)
815{
816 return do_arch_prctl(current, code, addr);
817}
818
819/*
820 * Capture the user space registers if the task is not running (in user space)
821 */
822int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
823{
824 struct pt_regs *pp, ptregs;
825
826 pp = (struct pt_regs *)(tsk->thread.rsp0);
827 --pp;
828
829 ptregs = *pp;
830 ptregs.cs &= 0xffff;
831 ptregs.ss &= 0xffff;
832
833 elf_core_copy_regs(regs, &ptregs);
834
835 return 1;
836}
837
838unsigned long arch_align_stack(unsigned long sp)
839{
840 if (randomize_va_space)
841 sp -= get_random_int() % 8192;
842 return sp & ~0xf;
843}