blob: 1d91271796e56725e6a0d51474215830362d2ddc [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * linux/arch/x86-64/kernel/process.c
3 *
4 * Copyright (C) 1995 Linus Torvalds
5 *
6 * Pentium III FXSR, SSE support
7 * Gareth Hughes <gareth@valinux.com>, May 2000
8 *
9 * X86-64 port
10 * Andi Kleen.
Ashok Raj76e4f662005-06-25 14:55:00 -070011 *
12 * CPU hotplug support - ashok.raj@intel.com
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * $Id: process.c,v 1.38 2002/01/15 10:08:03 ak Exp $
14 */
15
16/*
17 * This file handles the architecture-dependent parts of process handling..
18 */
19
20#include <stdarg.h>
21
Ashok Raj76e4f662005-06-25 14:55:00 -070022#include <linux/cpu.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070023#include <linux/errno.h>
24#include <linux/sched.h>
25#include <linux/kernel.h>
26#include <linux/mm.h>
27#include <linux/elfcore.h>
28#include <linux/smp.h>
29#include <linux/slab.h>
30#include <linux/user.h>
31#include <linux/module.h>
32#include <linux/a.out.h>
33#include <linux/interrupt.h>
34#include <linux/delay.h>
35#include <linux/irq.h>
36#include <linux/ptrace.h>
37#include <linux/utsname.h>
38#include <linux/random.h>
Rusty Lynch73649da2005-06-23 00:09:23 -070039#include <linux/kprobes.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070040
41#include <asm/uaccess.h>
42#include <asm/pgtable.h>
43#include <asm/system.h>
44#include <asm/io.h>
45#include <asm/processor.h>
46#include <asm/i387.h>
47#include <asm/mmu_context.h>
48#include <asm/pda.h>
49#include <asm/prctl.h>
50#include <asm/kdebug.h>
51#include <asm/desc.h>
52#include <asm/proto.h>
53#include <asm/ia32.h>
54
55asmlinkage extern void ret_from_fork(void);
56
57unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
58
59static atomic_t hlt_counter = ATOMIC_INIT(0);
60
61unsigned long boot_option_idle_override = 0;
62EXPORT_SYMBOL(boot_option_idle_override);
63
64/*
65 * Powermanagement idle function, if any..
66 */
67void (*pm_idle)(void);
68static DEFINE_PER_CPU(unsigned int, cpu_idle_state);
69
70void disable_hlt(void)
71{
72 atomic_inc(&hlt_counter);
73}
74
75EXPORT_SYMBOL(disable_hlt);
76
77void enable_hlt(void)
78{
79 atomic_dec(&hlt_counter);
80}
81
82EXPORT_SYMBOL(enable_hlt);
83
84/*
85 * We use this if we don't have any better
86 * idle routine..
87 */
88void default_idle(void)
89{
90 if (!atomic_read(&hlt_counter)) {
91 local_irq_disable();
92 if (!need_resched())
93 safe_halt();
94 else
95 local_irq_enable();
96 }
97}
98
99/*
100 * On SMP it's slightly faster (but much more power-consuming!)
101 * to poll the ->need_resched flag instead of waiting for the
102 * cross-CPU IPI to arrive. Use this option with caution.
103 */
104static void poll_idle (void)
105{
106 int oldval;
107
108 local_irq_enable();
109
110 /*
111 * Deal with another CPU just having chosen a thread to
112 * run here:
113 */
114 oldval = test_and_clear_thread_flag(TIF_NEED_RESCHED);
115
116 if (!oldval) {
117 set_thread_flag(TIF_POLLING_NRFLAG);
118 asm volatile(
119 "2:"
120 "testl %0,%1;"
121 "rep; nop;"
122 "je 2b;"
123 : :
124 "i" (_TIF_NEED_RESCHED),
125 "m" (current_thread_info()->flags));
126 } else {
127 set_need_resched();
128 }
129}
130
131void cpu_idle_wait(void)
132{
133 unsigned int cpu, this_cpu = get_cpu();
134 cpumask_t map;
135
136 set_cpus_allowed(current, cpumask_of_cpu(this_cpu));
137 put_cpu();
138
139 cpus_clear(map);
140 for_each_online_cpu(cpu) {
141 per_cpu(cpu_idle_state, cpu) = 1;
142 cpu_set(cpu, map);
143 }
144
145 __get_cpu_var(cpu_idle_state) = 0;
146
147 wmb();
148 do {
149 ssleep(1);
150 for_each_online_cpu(cpu) {
151 if (cpu_isset(cpu, map) && !per_cpu(cpu_idle_state, cpu))
152 cpu_clear(cpu, map);
153 }
154 cpus_and(map, map, cpu_online_map);
155 } while (!cpus_empty(map));
156}
157EXPORT_SYMBOL_GPL(cpu_idle_wait);
158
Ashok Raj76e4f662005-06-25 14:55:00 -0700159#ifdef CONFIG_HOTPLUG_CPU
160DECLARE_PER_CPU(int, cpu_state);
161
162#include <asm/nmi.h>
163/* We don't actually take CPU down, just spin without interrupts. */
164static inline void play_dead(void)
165{
166 idle_task_exit();
167 wbinvd();
168 mb();
169 /* Ack it */
170 __get_cpu_var(cpu_state) = CPU_DEAD;
171
172 while (1)
173 safe_halt();
174}
175#else
176static inline void play_dead(void)
177{
178 BUG();
179}
180#endif /* CONFIG_HOTPLUG_CPU */
181
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182/*
183 * The idle thread. There's no useful work to be
184 * done, so just try to conserve power and have a
185 * low exit latency (ie sit in a loop waiting for
186 * somebody to say that they'd like to reschedule)
187 */
188void cpu_idle (void)
189{
190 /* endless idle loop with no priority at all */
191 while (1) {
192 while (!need_resched()) {
193 void (*idle)(void);
194
195 if (__get_cpu_var(cpu_idle_state))
196 __get_cpu_var(cpu_idle_state) = 0;
197
198 rmb();
199 idle = pm_idle;
200 if (!idle)
201 idle = default_idle;
Ashok Raj76e4f662005-06-25 14:55:00 -0700202 if (cpu_is_offline(smp_processor_id()))
203 play_dead();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700204 idle();
205 }
206
207 schedule();
208 }
209}
210
211/*
212 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
213 * which can obviate IPI to trigger checking of need_resched.
214 * We execute MONITOR against need_resched and enter optimized wait state
215 * through MWAIT. Whenever someone changes need_resched, we would be woken
216 * up from MWAIT (without an IPI).
217 */
218static void mwait_idle(void)
219{
220 local_irq_enable();
221
222 if (!need_resched()) {
223 set_thread_flag(TIF_POLLING_NRFLAG);
224 do {
225 __monitor((void *)&current_thread_info()->flags, 0, 0);
226 if (need_resched())
227 break;
228 __mwait(0, 0);
229 } while (!need_resched());
230 clear_thread_flag(TIF_POLLING_NRFLAG);
231 }
232}
233
Ashok Raje6982c62005-06-25 14:54:58 -0700234void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700235{
236 static int printed;
237 if (cpu_has(c, X86_FEATURE_MWAIT)) {
238 /*
239 * Skip, if setup has overridden idle.
240 * One CPU supports mwait => All CPUs supports mwait
241 */
242 if (!pm_idle) {
243 if (!printed) {
244 printk("using mwait in idle threads.\n");
245 printed = 1;
246 }
247 pm_idle = mwait_idle;
248 }
249 }
250}
251
252static int __init idle_setup (char *str)
253{
254 if (!strncmp(str, "poll", 4)) {
255 printk("using polling idle threads.\n");
256 pm_idle = poll_idle;
257 }
258
259 boot_option_idle_override = 1;
260 return 1;
261}
262
263__setup("idle=", idle_setup);
264
265/* Prints also some state that isn't saved in the pt_regs */
266void __show_regs(struct pt_regs * regs)
267{
268 unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L, fs, gs, shadowgs;
269 unsigned int fsindex,gsindex;
270 unsigned int ds,cs,es;
271
272 printk("\n");
273 print_modules();
274 printk("Pid: %d, comm: %.20s %s %s\n",
275 current->pid, current->comm, print_tainted(), system_utsname.release);
276 printk("RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->rip);
277 printk_address(regs->rip);
278 printk("\nRSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, regs->rsp, regs->eflags);
279 printk("RAX: %016lx RBX: %016lx RCX: %016lx\n",
280 regs->rax, regs->rbx, regs->rcx);
281 printk("RDX: %016lx RSI: %016lx RDI: %016lx\n",
282 regs->rdx, regs->rsi, regs->rdi);
283 printk("RBP: %016lx R08: %016lx R09: %016lx\n",
284 regs->rbp, regs->r8, regs->r9);
285 printk("R10: %016lx R11: %016lx R12: %016lx\n",
286 regs->r10, regs->r11, regs->r12);
287 printk("R13: %016lx R14: %016lx R15: %016lx\n",
288 regs->r13, regs->r14, regs->r15);
289
290 asm("movl %%ds,%0" : "=r" (ds));
291 asm("movl %%cs,%0" : "=r" (cs));
292 asm("movl %%es,%0" : "=r" (es));
293 asm("movl %%fs,%0" : "=r" (fsindex));
294 asm("movl %%gs,%0" : "=r" (gsindex));
295
296 rdmsrl(MSR_FS_BASE, fs);
297 rdmsrl(MSR_GS_BASE, gs);
298 rdmsrl(MSR_KERNEL_GS_BASE, shadowgs);
299
300 asm("movq %%cr0, %0": "=r" (cr0));
301 asm("movq %%cr2, %0": "=r" (cr2));
302 asm("movq %%cr3, %0": "=r" (cr3));
303 asm("movq %%cr4, %0": "=r" (cr4));
304
305 printk("FS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
306 fs,fsindex,gs,gsindex,shadowgs);
307 printk("CS: %04x DS: %04x ES: %04x CR0: %016lx\n", cs, ds, es, cr0);
308 printk("CR2: %016lx CR3: %016lx CR4: %016lx\n", cr2, cr3, cr4);
309}
310
311void show_regs(struct pt_regs *regs)
312{
313 __show_regs(regs);
314 show_trace(&regs->rsp);
315}
316
317/*
318 * Free current thread data structures etc..
319 */
320void exit_thread(void)
321{
322 struct task_struct *me = current;
323 struct thread_struct *t = &me->thread;
Rusty Lynch73649da2005-06-23 00:09:23 -0700324
325 /*
326 * Remove function-return probe instances associated with this task
327 * and put them back on the free list. Do not insert an exit probe for
328 * this function, it will be disabled by kprobe_flush_task if you do.
329 */
330 kprobe_flush_task(me);
331
Linus Torvalds1da177e2005-04-16 15:20:36 -0700332 if (me->thread.io_bitmap_ptr) {
333 struct tss_struct *tss = &per_cpu(init_tss, get_cpu());
334
335 kfree(t->io_bitmap_ptr);
336 t->io_bitmap_ptr = NULL;
337 /*
338 * Careful, clear this in the TSS too:
339 */
340 memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
341 t->io_bitmap_max = 0;
342 put_cpu();
343 }
344}
345
346void flush_thread(void)
347{
348 struct task_struct *tsk = current;
349 struct thread_info *t = current_thread_info();
350
Rusty Lynch73649da2005-06-23 00:09:23 -0700351 /*
352 * Remove function-return probe instances associated with this task
353 * and put them back on the free list. Do not insert an exit probe for
354 * this function, it will be disabled by kprobe_flush_task if you do.
355 */
356 kprobe_flush_task(tsk);
357
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 if (t->flags & _TIF_ABI_PENDING)
359 t->flags ^= (_TIF_ABI_PENDING | _TIF_IA32);
360
361 tsk->thread.debugreg0 = 0;
362 tsk->thread.debugreg1 = 0;
363 tsk->thread.debugreg2 = 0;
364 tsk->thread.debugreg3 = 0;
365 tsk->thread.debugreg6 = 0;
366 tsk->thread.debugreg7 = 0;
367 memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array));
368 /*
369 * Forget coprocessor state..
370 */
371 clear_fpu(tsk);
372 clear_used_math();
373}
374
375void release_thread(struct task_struct *dead_task)
376{
377 if (dead_task->mm) {
378 if (dead_task->mm->context.size) {
379 printk("WARNING: dead process %8s still has LDT? <%p/%d>\n",
380 dead_task->comm,
381 dead_task->mm->context.ldt,
382 dead_task->mm->context.size);
383 BUG();
384 }
385 }
386}
387
388static inline void set_32bit_tls(struct task_struct *t, int tls, u32 addr)
389{
390 struct user_desc ud = {
391 .base_addr = addr,
392 .limit = 0xfffff,
393 .seg_32bit = 1,
394 .limit_in_pages = 1,
395 .useable = 1,
396 };
397 struct n_desc_struct *desc = (void *)t->thread.tls_array;
398 desc += tls;
399 desc->a = LDT_entry_a(&ud);
400 desc->b = LDT_entry_b(&ud);
401}
402
403static inline u32 read_32bit_tls(struct task_struct *t, int tls)
404{
405 struct desc_struct *desc = (void *)t->thread.tls_array;
406 desc += tls;
407 return desc->base0 |
408 (((u32)desc->base1) << 16) |
409 (((u32)desc->base2) << 24);
410}
411
412/*
413 * This gets called before we allocate a new thread and copy
414 * the current task into it.
415 */
416void prepare_to_copy(struct task_struct *tsk)
417{
418 unlazy_fpu(tsk);
419}
420
421int copy_thread(int nr, unsigned long clone_flags, unsigned long rsp,
422 unsigned long unused,
423 struct task_struct * p, struct pt_regs * regs)
424{
425 int err;
426 struct pt_regs * childregs;
427 struct task_struct *me = current;
428
429 childregs = ((struct pt_regs *) (THREAD_SIZE + (unsigned long) p->thread_info)) - 1;
430
431 *childregs = *regs;
432
433 childregs->rax = 0;
434 childregs->rsp = rsp;
435 if (rsp == ~0UL) {
436 childregs->rsp = (unsigned long)childregs;
437 }
438
439 p->thread.rsp = (unsigned long) childregs;
440 p->thread.rsp0 = (unsigned long) (childregs+1);
441 p->thread.userrsp = me->thread.userrsp;
442
443 set_ti_thread_flag(p->thread_info, TIF_FORK);
444
445 p->thread.fs = me->thread.fs;
446 p->thread.gs = me->thread.gs;
447
H. J. Lufd51f662005-05-01 08:58:48 -0700448 asm("mov %%gs,%0" : "=m" (p->thread.gsindex));
449 asm("mov %%fs,%0" : "=m" (p->thread.fsindex));
450 asm("mov %%es,%0" : "=m" (p->thread.es));
451 asm("mov %%ds,%0" : "=m" (p->thread.ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452
453 if (unlikely(me->thread.io_bitmap_ptr != NULL)) {
454 p->thread.io_bitmap_ptr = kmalloc(IO_BITMAP_BYTES, GFP_KERNEL);
455 if (!p->thread.io_bitmap_ptr) {
456 p->thread.io_bitmap_max = 0;
457 return -ENOMEM;
458 }
459 memcpy(p->thread.io_bitmap_ptr, me->thread.io_bitmap_ptr, IO_BITMAP_BYTES);
460 }
461
462 /*
463 * Set a new TLS for the child thread?
464 */
465 if (clone_flags & CLONE_SETTLS) {
466#ifdef CONFIG_IA32_EMULATION
467 if (test_thread_flag(TIF_IA32))
468 err = ia32_child_tls(p, childregs);
469 else
470#endif
471 err = do_arch_prctl(p, ARCH_SET_FS, childregs->r8);
472 if (err)
473 goto out;
474 }
475 err = 0;
476out:
477 if (err && p->thread.io_bitmap_ptr) {
478 kfree(p->thread.io_bitmap_ptr);
479 p->thread.io_bitmap_max = 0;
480 }
481 return err;
482}
483
484/*
485 * This special macro can be used to load a debugging register
486 */
487#define loaddebug(thread,r) set_debug(thread->debugreg ## r, r)
488
489/*
490 * switch_to(x,y) should switch tasks from x to y.
491 *
492 * This could still be optimized:
493 * - fold all the options into a flag word and test it with a single test.
494 * - could test fs/gs bitsliced
495 */
496struct task_struct *__switch_to(struct task_struct *prev_p, struct task_struct *next_p)
497{
498 struct thread_struct *prev = &prev_p->thread,
499 *next = &next_p->thread;
500 int cpu = smp_processor_id();
501 struct tss_struct *tss = &per_cpu(init_tss, cpu);
502
503 unlazy_fpu(prev_p);
504
505 /*
506 * Reload esp0, LDT and the page table pointer:
507 */
508 tss->rsp0 = next->rsp0;
509
510 /*
511 * Switch DS and ES.
512 * This won't pick up thread selector changes, but I guess that is ok.
513 */
H. J. Lufd51f662005-05-01 08:58:48 -0700514 asm volatile("mov %%es,%0" : "=m" (prev->es));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700515 if (unlikely(next->es | prev->es))
516 loadsegment(es, next->es);
517
H. J. Lufd51f662005-05-01 08:58:48 -0700518 asm volatile ("mov %%ds,%0" : "=m" (prev->ds));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700519 if (unlikely(next->ds | prev->ds))
520 loadsegment(ds, next->ds);
521
522 load_TLS(next, cpu);
523
524 /*
525 * Switch FS and GS.
526 */
527 {
528 unsigned fsindex;
529 asm volatile("movl %%fs,%0" : "=r" (fsindex));
530 /* segment register != 0 always requires a reload.
531 also reload when it has changed.
532 when prev process used 64bit base always reload
533 to avoid an information leak. */
534 if (unlikely(fsindex | next->fsindex | prev->fs)) {
535 loadsegment(fs, next->fsindex);
536 /* check if the user used a selector != 0
537 * if yes clear 64bit base, since overloaded base
538 * is always mapped to the Null selector
539 */
540 if (fsindex)
541 prev->fs = 0;
542 }
543 /* when next process has a 64bit base use it */
544 if (next->fs)
545 wrmsrl(MSR_FS_BASE, next->fs);
546 prev->fsindex = fsindex;
547 }
548 {
549 unsigned gsindex;
550 asm volatile("movl %%gs,%0" : "=r" (gsindex));
551 if (unlikely(gsindex | next->gsindex | prev->gs)) {
552 load_gs_index(next->gsindex);
553 if (gsindex)
554 prev->gs = 0;
555 }
556 if (next->gs)
557 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
558 prev->gsindex = gsindex;
559 }
560
561 /*
562 * Switch the PDA context.
563 */
564 prev->userrsp = read_pda(oldrsp);
565 write_pda(oldrsp, next->userrsp);
566 write_pda(pcurrent, next_p);
567 write_pda(kernelstack, (unsigned long)next_p->thread_info + THREAD_SIZE - PDA_STACKOFFSET);
568
569 /*
570 * Now maybe reload the debug registers
571 */
572 if (unlikely(next->debugreg7)) {
573 loaddebug(next, 0);
574 loaddebug(next, 1);
575 loaddebug(next, 2);
576 loaddebug(next, 3);
577 /* no 4 and 5 */
578 loaddebug(next, 6);
579 loaddebug(next, 7);
580 }
581
582
583 /*
584 * Handle the IO bitmap
585 */
586 if (unlikely(prev->io_bitmap_ptr || next->io_bitmap_ptr)) {
587 if (next->io_bitmap_ptr)
588 /*
589 * Copy the relevant range of the IO bitmap.
590 * Normally this is 128 bytes or less:
591 */
592 memcpy(tss->io_bitmap, next->io_bitmap_ptr,
593 max(prev->io_bitmap_max, next->io_bitmap_max));
594 else {
595 /*
596 * Clear any possible leftover bits:
597 */
598 memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
599 }
600 }
601
602 return prev_p;
603}
604
605/*
606 * sys_execve() executes a new program.
607 */
608asmlinkage
609long sys_execve(char __user *name, char __user * __user *argv,
610 char __user * __user *envp, struct pt_regs regs)
611{
612 long error;
613 char * filename;
614
615 filename = getname(name);
616 error = PTR_ERR(filename);
617 if (IS_ERR(filename))
618 return error;
619 error = do_execve(filename, argv, envp, &regs);
620 if (error == 0) {
621 task_lock(current);
622 current->ptrace &= ~PT_DTRACE;
623 task_unlock(current);
624 }
625 putname(filename);
626 return error;
627}
628
629void set_personality_64bit(void)
630{
631 /* inherit personality from parent */
632
633 /* Make sure to be in 64bit mode */
634 clear_thread_flag(TIF_IA32);
635
636 /* TBD: overwrites user setup. Should have two bits.
637 But 64bit processes have always behaved this way,
638 so it's not too bad. The main problem is just that
639 32bit childs are affected again. */
640 current->personality &= ~READ_IMPLIES_EXEC;
641}
642
643asmlinkage long sys_fork(struct pt_regs *regs)
644{
645 return do_fork(SIGCHLD, regs->rsp, regs, 0, NULL, NULL);
646}
647
648asmlinkage long sys_clone(unsigned long clone_flags, unsigned long newsp, void __user *parent_tid, void __user *child_tid, struct pt_regs *regs)
649{
650 if (!newsp)
651 newsp = regs->rsp;
652 return do_fork(clone_flags, newsp, regs, 0, parent_tid, child_tid);
653}
654
655/*
656 * This is trivial, and on the face of it looks like it
657 * could equally well be done in user mode.
658 *
659 * Not so, for quite unobvious reasons - register pressure.
660 * In user mode vfork() cannot have a stack frame, and if
661 * done by calling the "clone()" system call directly, you
662 * do not have enough call-clobbered registers to hold all
663 * the information you need.
664 */
665asmlinkage long sys_vfork(struct pt_regs *regs)
666{
667 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->rsp, regs, 0,
668 NULL, NULL);
669}
670
671unsigned long get_wchan(struct task_struct *p)
672{
673 unsigned long stack;
674 u64 fp,rip;
675 int count = 0;
676
677 if (!p || p == current || p->state==TASK_RUNNING)
678 return 0;
679 stack = (unsigned long)p->thread_info;
680 if (p->thread.rsp < stack || p->thread.rsp > stack+THREAD_SIZE)
681 return 0;
682 fp = *(u64 *)(p->thread.rsp);
683 do {
684 if (fp < (unsigned long)stack || fp > (unsigned long)stack+THREAD_SIZE)
685 return 0;
686 rip = *(u64 *)(fp+8);
687 if (!in_sched_functions(rip))
688 return rip;
689 fp = *(u64 *)fp;
690 } while (count++ < 16);
691 return 0;
692}
693
694long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
695{
696 int ret = 0;
697 int doit = task == current;
698 int cpu;
699
700 switch (code) {
701 case ARCH_SET_GS:
Suresh Siddha84929802005-06-21 17:14:32 -0700702 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703 return -EPERM;
704 cpu = get_cpu();
705 /* handle small bases via the GDT because that's faster to
706 switch. */
707 if (addr <= 0xffffffff) {
708 set_32bit_tls(task, GS_TLS, addr);
709 if (doit) {
710 load_TLS(&task->thread, cpu);
711 load_gs_index(GS_TLS_SEL);
712 }
713 task->thread.gsindex = GS_TLS_SEL;
714 task->thread.gs = 0;
715 } else {
716 task->thread.gsindex = 0;
717 task->thread.gs = addr;
718 if (doit) {
719 load_gs_index(0);
720 ret = checking_wrmsrl(MSR_KERNEL_GS_BASE, addr);
721 }
722 }
723 put_cpu();
724 break;
725 case ARCH_SET_FS:
726 /* Not strictly needed for fs, but do it for symmetry
727 with gs */
Suresh Siddha84929802005-06-21 17:14:32 -0700728 if (addr >= TASK_SIZE_OF(task))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729 return -EPERM;
730 cpu = get_cpu();
731 /* handle small bases via the GDT because that's faster to
732 switch. */
733 if (addr <= 0xffffffff) {
734 set_32bit_tls(task, FS_TLS, addr);
735 if (doit) {
736 load_TLS(&task->thread, cpu);
737 asm volatile("movl %0,%%fs" :: "r" (FS_TLS_SEL));
738 }
739 task->thread.fsindex = FS_TLS_SEL;
740 task->thread.fs = 0;
741 } else {
742 task->thread.fsindex = 0;
743 task->thread.fs = addr;
744 if (doit) {
745 /* set the selector to 0 to not confuse
746 __switch_to */
747 asm volatile("movl %0,%%fs" :: "r" (0));
748 ret = checking_wrmsrl(MSR_FS_BASE, addr);
749 }
750 }
751 put_cpu();
752 break;
753 case ARCH_GET_FS: {
754 unsigned long base;
755 if (task->thread.fsindex == FS_TLS_SEL)
756 base = read_32bit_tls(task, FS_TLS);
757 else if (doit) {
758 rdmsrl(MSR_FS_BASE, base);
759 } else
760 base = task->thread.fs;
761 ret = put_user(base, (unsigned long __user *)addr);
762 break;
763 }
764 case ARCH_GET_GS: {
765 unsigned long base;
766 if (task->thread.gsindex == GS_TLS_SEL)
767 base = read_32bit_tls(task, GS_TLS);
768 else if (doit) {
769 rdmsrl(MSR_KERNEL_GS_BASE, base);
770 } else
771 base = task->thread.gs;
772 ret = put_user(base, (unsigned long __user *)addr);
773 break;
774 }
775
776 default:
777 ret = -EINVAL;
778 break;
779 }
780
781 return ret;
782}
783
784long sys_arch_prctl(int code, unsigned long addr)
785{
786 return do_arch_prctl(current, code, addr);
787}
788
789/*
790 * Capture the user space registers if the task is not running (in user space)
791 */
792int dump_task_regs(struct task_struct *tsk, elf_gregset_t *regs)
793{
794 struct pt_regs *pp, ptregs;
795
796 pp = (struct pt_regs *)(tsk->thread.rsp0);
797 --pp;
798
799 ptregs = *pp;
800 ptregs.cs &= 0xffff;
801 ptregs.ss &= 0xffff;
802
803 elf_core_copy_regs(regs, &ptregs);
804
805 return 1;
806}
807
808unsigned long arch_align_stack(unsigned long sp)
809{
810 if (randomize_va_space)
811 sp -= get_random_int() % 8192;
812 return sp & ~0xf;
813}