Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/signal
[linux-3.10.git] / kernel / fork.c
index da4a6a1..3c31e87 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/cgroup.h>
 #include <linux/security.h>
 #include <linux/hugetlb.h>
+#include <linux/seccomp.h>
 #include <linux/swap.h>
 #include <linux/syscalls.h>
 #include <linux/jiffies.h>
@@ -47,6 +48,7 @@
 #include <linux/audit.h>
 #include <linux/memcontrol.h>
 #include <linux/ftrace.h>
+#include <linux/proc_fs.h>
 #include <linux/profile.h>
 #include <linux/rmap.h>
 #include <linux/ksm.h>
@@ -66,6 +68,8 @@
 #include <linux/user-return-notifier.h>
 #include <linux/oom.h>
 #include <linux/khugepaged.h>
+#include <linux/signalfd.h>
+#include <linux/uprobes.h>
 
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
@@ -76,6 +80,9 @@
 
 #include <trace/events/sched.h>
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/task.h>
+
 /*
  * Protected counters by write_lock_irq(&tasklist_lock)
  */
@@ -107,24 +114,40 @@ int nr_processes(void)
        return total;
 }
 
-#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
-# define alloc_task_struct_node(node)          \
-               kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node)
-# define free_task_struct(tsk)                 \
-               kmem_cache_free(task_struct_cachep, (tsk))
+void __weak arch_release_task_struct(struct task_struct *tsk)
+{
+}
+
+#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
 static struct kmem_cache *task_struct_cachep;
+
+static inline struct task_struct *alloc_task_struct_node(int node)
+{
+       return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node);
+}
+
+static inline void free_task_struct(struct task_struct *tsk)
+{
+       kmem_cache_free(task_struct_cachep, tsk);
+}
 #endif
 
-#ifndef __HAVE_ARCH_THREAD_INFO_ALLOCATOR
+void __weak arch_release_thread_info(struct thread_info *ti)
+{
+}
+
+#ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR
+
+/*
+ * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
+ * kmemcache based allocator.
+ */
+# if THREAD_SIZE >= PAGE_SIZE
 static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
                                                  int node)
 {
-#ifdef CONFIG_DEBUG_STACK_USAGE
-       gfp_t mask = GFP_KERNEL | __GFP_ZERO;
-#else
-       gfp_t mask = GFP_KERNEL;
-#endif
-       struct page *page = alloc_pages_node(node, mask, THREAD_SIZE_ORDER);
+       struct page *page = alloc_pages_node(node, THREADINFO_GFP,
+                                            THREAD_SIZE_ORDER);
 
        return page ? page_address(page) : NULL;
 }
@@ -133,6 +156,27 @@ static inline void free_thread_info(struct thread_info *ti)
 {
        free_pages((unsigned long)ti, THREAD_SIZE_ORDER);
 }
+# else
+static struct kmem_cache *thread_info_cache;
+
+static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
+                                                 int node)
+{
+       return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node);
+}
+
+static void free_thread_info(struct thread_info *ti)
+{
+       kmem_cache_free(thread_info_cache, ti);
+}
+
+void thread_info_cache_init(void)
+{
+       thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
+                                             THREAD_SIZE, 0, NULL);
+       BUG_ON(thread_info_cache == NULL);
+}
+# endif
 #endif
 
 /* SLAB cache for signal_struct structures (tsk->signal) */
@@ -163,9 +207,12 @@ static void account_kernel_stack(struct thread_info *ti, int account)
 void free_task(struct task_struct *tsk)
 {
        account_kernel_stack(tsk->stack, -1);
+       arch_release_thread_info(tsk->stack);
        free_thread_info(tsk->stack);
        rt_mutex_debug_task_free(tsk);
        ftrace_graph_exit_task(tsk);
+       put_seccomp_filter(tsk);
+       arch_release_task_struct(tsk);
        free_task_struct(tsk);
 }
 EXPORT_SYMBOL(free_task);
@@ -189,6 +236,7 @@ void __put_task_struct(struct task_struct *tsk)
        WARN_ON(atomic_read(&tsk->usage));
        WARN_ON(tsk == current);
 
+       security_task_free(tsk);
        exit_creds(tsk);
        delayacct_tsk_free(tsk);
        put_signal_struct(tsk->signal);
@@ -198,17 +246,11 @@ void __put_task_struct(struct task_struct *tsk)
 }
 EXPORT_SYMBOL_GPL(__put_task_struct);
 
-/*
- * macro override instead of weak attribute alias, to workaround
- * gcc 4.1.0 and 4.1.1 bugs with weak attribute and empty functions.
- */
-#ifndef arch_task_cache_init
-#define arch_task_cache_init()
-#endif
+void __init __weak arch_task_cache_init(void) { }
 
 void __init fork_init(unsigned long mempages)
 {
-#ifndef __HAVE_ARCH_TASK_STRUCT_ALLOCATOR
+#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
 #ifndef ARCH_MIN_TASKALIGN
 #define ARCH_MIN_TASKALIGN     L1_CACHE_BYTES
 #endif
@@ -255,21 +297,17 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
        int node = tsk_fork_get_node(orig);
        int err;
 
-       prepare_to_copy(orig);
-
        tsk = alloc_task_struct_node(node);
        if (!tsk)
                return NULL;
 
        ti = alloc_thread_info_node(tsk, node);
-       if (!ti) {
-               free_task_struct(tsk);
-               return NULL;
-       }
+       if (!ti)
+               goto free_tsk;
 
        err = arch_dup_task_struct(tsk, orig);
        if (err)
-               goto out;
+               goto free_ti;
 
        tsk->stack = ti;
 
@@ -292,13 +330,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
        tsk->btrace_seq = 0;
 #endif
        tsk->splice_pipe = NULL;
+       tsk->task_frag.page = NULL;
 
        account_kernel_stack(ti, 1);
 
        return tsk;
 
-out:
+free_ti:
        free_thread_info(ti);
+free_tsk:
        free_task_struct(tsk);
        return NULL;
 }
@@ -312,8 +352,10 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
        unsigned long charge;
        struct mempolicy *pol;
 
+       uprobe_start_dup_mmap();
        down_write(&oldmm->mmap_sem);
        flush_cache_dup_mm(oldmm);
+       uprobe_dup_mmap(oldmm, mm);
        /*
         * Not linked in yet - no deadlock potential:
         */
@@ -342,16 +384,15 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
                struct file *file;
 
                if (mpnt->vm_flags & VM_DONTCOPY) {
-                       long pages = vma_pages(mpnt);
-                       mm->total_vm -= pages;
                        vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
-                                                               -pages);
+                                                       -vma_pages(mpnt));
                        continue;
                }
                charge = 0;
                if (mpnt->vm_flags & VM_ACCOUNT) {
-                       unsigned int len = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
-                       if (security_vm_enough_memory(len))
+                       unsigned long len = vma_pages(mpnt);
+
+                       if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
                                goto fail_nomem;
                        charge = len;
                }
@@ -383,7 +424,12 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
                                mapping->i_mmap_writable++;
                        flush_dcache_mmap_lock(mapping);
                        /* insert tmp into the share list, just after mpnt */
-                       vma_prio_tree_add(tmp, mpnt);
+                       if (unlikely(tmp->vm_flags & VM_NONLINEAR))
+                               vma_nonlinear_insert(tmp,
+                                               &mapping->i_mmap_nonlinear);
+                       else
+                               vma_interval_tree_insert_after(tmp, mpnt,
+                                                       &mapping->i_mmap);
                        flush_dcache_mmap_unlock(mapping);
                        mutex_unlock(&mapping->i_mmap_mutex);
                }
@@ -424,6 +470,7 @@ out:
        up_write(&mm->mmap_sem);
        flush_tlb_mm(oldmm);
        up_write(&oldmm->mmap_sem);
+       uprobe_end_dup_mmap();
        return retval;
 fail_nomem_anon_vma_fork:
        mpol_put(pol);
@@ -507,6 +554,23 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
        return NULL;
 }
 
+static void check_mm(struct mm_struct *mm)
+{
+       int i;
+
+       for (i = 0; i < NR_MM_COUNTERS; i++) {
+               long x = atomic_long_read(&mm->rss_stat.count[i]);
+
+               if (unlikely(x))
+                       printk(KERN_ALERT "BUG: Bad rss-counter state "
+                                         "mm:%p idx:%d val:%ld\n", mm, i, x);
+       }
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+       VM_BUG_ON(mm->pmd_huge_pte);
+#endif
+}
+
 /*
  * Allocate and initialize an mm_struct.
  */
@@ -534,9 +598,7 @@ void __mmdrop(struct mm_struct *mm)
        mm_free_pgd(mm);
        destroy_context(mm);
        mmu_notifier_mm_destroy(mm);
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       VM_BUG_ON(mm->pmd_huge_pte);
-#endif
+       check_mm(mm);
        free_mm(mm);
 }
 EXPORT_SYMBOL_GPL(__mmdrop);
@@ -549,6 +611,7 @@ void mmput(struct mm_struct *mm)
        might_sleep();
 
        if (atomic_dec_and_test(&mm->mm_users)) {
+               uprobe_clear_state(mm);
                exit_aio(mm);
                ksm_exit(mm);
                khugepaged_exit(mm); /* must run before exit_mmap */
@@ -559,7 +622,6 @@ void mmput(struct mm_struct *mm)
                        list_del(&mm->mmlist);
                        spin_unlock(&mmlist_lock);
                }
-               put_swap_token(mm);
                if (mm->binfmt)
                        module_put(mm->binfmt->module);
                mmdrop(mm);
@@ -567,26 +629,6 @@ void mmput(struct mm_struct *mm)
 }
 EXPORT_SYMBOL_GPL(mmput);
 
-/*
- * We added or removed a vma mapping the executable. The vmas are only mapped
- * during exec and are not mapped with the mmap system call.
- * Callers must hold down_write() on the mm's mmap_sem for these
- */
-void added_exe_file_vma(struct mm_struct *mm)
-{
-       mm->num_exe_file_vmas++;
-}
-
-void removed_exe_file_vma(struct mm_struct *mm)
-{
-       mm->num_exe_file_vmas--;
-       if ((mm->num_exe_file_vmas == 0) && mm->exe_file) {
-               fput(mm->exe_file);
-               mm->exe_file = NULL;
-       }
-
-}
-
 void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
 {
        if (new_exe_file)
@@ -594,15 +636,13 @@ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
        if (mm->exe_file)
                fput(mm->exe_file);
        mm->exe_file = new_exe_file;
-       mm->num_exe_file_vmas = 0;
 }
 
 struct file *get_mm_exe_file(struct mm_struct *mm)
 {
        struct file *exe_file;
 
-       /* We need mmap_sem to protect against races with removal of
-        * VM_EXECUTABLE vmas */
+       /* We need mmap_sem to protect against races with removal of exe_file */
        down_read(&mm->mmap_sem);
        exe_file = mm->exe_file;
        if (exe_file)
@@ -644,6 +684,58 @@ struct mm_struct *get_task_mm(struct task_struct *task)
 }
 EXPORT_SYMBOL_GPL(get_task_mm);
 
+struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
+{
+       struct mm_struct *mm;
+       int err;
+
+       err =  mutex_lock_killable(&task->signal->cred_guard_mutex);
+       if (err)
+               return ERR_PTR(err);
+
+       mm = get_task_mm(task);
+       if (mm && mm != current->mm &&
+                       !ptrace_may_access(task, mode)) {
+               mmput(mm);
+               mm = ERR_PTR(-EACCES);
+       }
+       mutex_unlock(&task->signal->cred_guard_mutex);
+
+       return mm;
+}
+
+static void complete_vfork_done(struct task_struct *tsk)
+{
+       struct completion *vfork;
+
+       task_lock(tsk);
+       vfork = tsk->vfork_done;
+       if (likely(vfork)) {
+               tsk->vfork_done = NULL;
+               complete(vfork);
+       }
+       task_unlock(tsk);
+}
+
+static int wait_for_vfork_done(struct task_struct *child,
+                               struct completion *vfork)
+{
+       int killed;
+
+       freezer_do_not_count();
+       killed = wait_for_completion_killable(vfork);
+       freezer_count();
+
+       if (killed) {
+               task_lock(child);
+               child->vfork_done = NULL;
+               task_unlock(child);
+       }
+
+       put_task_struct(child);
+       return killed;
+}
+
 /* Please note the differences between mmput and mm_release.
  * mmput is called whenever we stop holding onto a mm_struct,
  * error success whatever.
@@ -659,8 +751,6 @@ EXPORT_SYMBOL_GPL(get_task_mm);
  */
 void mm_release(struct task_struct *tsk, struct mm_struct *mm)
 {
-       struct completion *vfork_done = tsk->vfork_done;
-
        /* Get rid of any futexes when releasing the mm */
 #ifdef CONFIG_FUTEX
        if (unlikely(tsk->robust_list)) {
@@ -677,20 +767,17 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
                exit_pi_state_list(tsk);
 #endif
 
+       uprobe_free_utask(tsk);
+
        /* Get rid of any cached register state */
        deactivate_mm(tsk, mm);
 
-       /* notify parent sleeping on vfork() */
-       if (vfork_done) {
-               tsk->vfork_done = NULL;
-               complete(vfork_done);
-       }
-
        /*
         * If we're exiting normally, clear a user-space tid field if
         * requested.  We leave this alone when dying by signal, to leave
         * the value intact in a core dump, and to save the unnecessary
-        * trouble otherwise.  Userland only wants this done for a sys_exit.
+        * trouble, say, a killed vfork parent shouldn't touch this mm.
+        * Userland only wants this done for a sys_exit.
         */
        if (tsk->clear_child_tid) {
                if (!(tsk->flags & PF_SIGNALED) &&
@@ -705,6 +792,13 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm)
                }
                tsk->clear_child_tid = NULL;
        }
+
+       /*
+        * All done, finally we can wake up parent and return this mm to him.
+        * Also kthread_stop() uses this completion for synchronization.
+        */
+       if (tsk->vfork_done)
+               complete_vfork_done(tsk);
 }
 
 /*
@@ -726,14 +820,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
        memcpy(mm, oldmm, sizeof(*mm));
        mm_init_cpumask(mm);
 
-       /* Initializing for Swap token stuff */
-       mm->token_priority = 0;
-       mm->last_interval = 0;
-
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
        mm->pmd_huge_pte = NULL;
 #endif
-
        if (!mm_init(mm, tsk))
                goto fail_nomem;
 
@@ -807,10 +896,6 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
                goto fail_nomem;
 
 good_mm:
-       /* Initializing for Swap token stuff */
-       mm->token_priority = 0;
-       mm->last_interval = 0;
-
        tsk->mm = mm;
        tsk->active_mm = mm;
        return 0;
@@ -870,6 +955,7 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
 {
 #ifdef CONFIG_BLOCK
        struct io_context *ioc = current->io_context;
+       struct io_context *new_ioc;
 
        if (!ioc)
                return 0;
@@ -877,15 +963,15 @@ static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
         * Share io context with parent, if CLONE_IO is set
         */
        if (clone_flags & CLONE_IO) {
-               tsk->io_context = ioc_task_link(ioc);
-               if (unlikely(!tsk->io_context))
-                       return -ENOMEM;
+               ioc_task_link(ioc);
+               tsk->io_context = ioc;
        } else if (ioprio_valid(ioc->ioprio)) {
-               tsk->io_context = alloc_io_context(GFP_KERNEL, -1);
-               if (unlikely(!tsk->io_context))
+               new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
+               if (unlikely(!new_ioc))
                        return -ENOMEM;
 
-               tsk->io_context->ioprio = ioc->ioprio;
+               new_ioc->ioprio = ioc->ioprio;
+               put_io_context(new_ioc);
        }
 #endif
        return 0;
@@ -910,8 +996,10 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
 
 void __cleanup_sighand(struct sighand_struct *sighand)
 {
-       if (atomic_dec_and_test(&sighand->count))
+       if (atomic_dec_and_test(&sighand->count)) {
+               signalfd_cleanup(sighand);
                kmem_cache_free(sighand_cachep, sighand);
+       }
 }
 
 
@@ -972,13 +1060,15 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
        sched_autogroup_fork(sig);
 
 #ifdef CONFIG_CGROUPS
-       init_rwsem(&sig->threadgroup_fork_lock);
+       init_rwsem(&sig->group_rwsem);
 #endif
 
-       sig->oom_adj = current->signal->oom_adj;
        sig->oom_score_adj = current->signal->oom_score_adj;
        sig->oom_score_adj_min = current->signal->oom_score_adj_min;
 
+       sig->has_child_subreaper = current->signal->has_child_subreaper ||
+                                  current->signal->is_child_subreaper;
+
        mutex_init(&sig->cred_guard_mutex);
 
        return 0;
@@ -990,9 +1080,7 @@ static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 
        new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
        new_flags |= PF_FORKNOEXEC;
-       new_flags |= PF_STARTING;
        p->flags = new_flags;
-       clear_freeze_flag(p);
 }
 
 SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
@@ -1023,8 +1111,8 @@ void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
  */
 static void posix_cpu_timers_init(struct task_struct *tsk)
 {
-       tsk->cputime_expires.prof_exp = cputime_zero;
-       tsk->cputime_expires.virt_exp = cputime_zero;
+       tsk->cputime_expires.prof_exp = 0;
+       tsk->cputime_expires.virt_exp = 0;
        tsk->cputime_expires.sched_exp = 0;
        INIT_LIST_HEAD(&tsk->cpu_timers[0]);
        INIT_LIST_HEAD(&tsk->cpu_timers[1]);
@@ -1041,7 +1129,6 @@ static void posix_cpu_timers_init(struct task_struct *tsk)
  */
 static struct task_struct *copy_process(unsigned long clone_flags,
                                        unsigned long stack_start,
-                                       struct pt_regs *regs,
                                        unsigned long stack_size,
                                        int __user *child_tidptr,
                                        struct pid *pid,
@@ -1049,7 +1136,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 {
        int retval;
        struct task_struct *p;
-       int cgroup_callbacks_done = 0;
 
        if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
                return ERR_PTR(-EINVAL);
@@ -1089,6 +1175,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                goto fork_out;
 
        ftrace_graph_init_task(p);
+       get_seccomp_filter(p);
 
        rt_mutex_init_task(p);
 
@@ -1132,14 +1219,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
        init_sigpending(&p->pending);
 
-       p->utime = cputime_zero;
-       p->stime = cputime_zero;
-       p->gtime = cputime_zero;
-       p->utimescaled = cputime_zero;
-       p->stimescaled = cputime_zero;
+       p->utime = p->stime = p->gtime = 0;
+       p->utimescaled = p->stimescaled = 0;
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
-       p->prev_utime = cputime_zero;
-       p->prev_stime = cputime_zero;
+       p->prev_cputime.utime = p->prev_cputime.stime = 0;
 #endif
 #if defined(SPLIT_RSS_COUNTING)
        memset(&p->rss_stat, 0, sizeof(p->rss_stat));
@@ -1158,7 +1241,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        p->io_context = NULL;
        p->audit_context = NULL;
        if (clone_flags & CLONE_THREAD)
-               threadgroup_fork_read_lock(current);
+               threadgroup_change_begin(current);
        cgroup_fork(p);
 #ifdef CONFIG_NUMA
        p->mempolicy = mpol_dup(p->mempolicy);
@@ -1172,14 +1255,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_CPUSETS
        p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
        p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
+       seqcount_init(&p->mems_allowed_seq);
 #endif
 #ifdef CONFIG_TRACE_IRQFLAGS
        p->irq_events = 0;
-#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
-       p->hardirqs_enabled = 1;
-#else
        p->hardirqs_enabled = 0;
-#endif
        p->hardirq_enable_ip = 0;
        p->hardirq_enable_event = 0;
        p->hardirq_disable_ip = _THIS_IP_;
@@ -1201,7 +1281,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 #ifdef CONFIG_DEBUG_MUTEXES
        p->blocked_on = NULL; /* not blocked yet */
 #endif
-#ifdef CONFIG_CGROUP_MEM_RES_CTLR
+#ifdef CONFIG_MEMCG
        p->memcg_batch.do_batch = 0;
        p->memcg_batch.memcg = NULL;
 #endif
@@ -1240,7 +1320,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        retval = copy_io(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_namespaces;
-       retval = copy_thread(clone_flags, stack_start, stack_size, p, regs);
+       retval = copy_thread(clone_flags, stack_start, stack_size, p);
        if (retval)
                goto bad_fork_cleanup_io;
 
@@ -1272,6 +1352,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        INIT_LIST_HEAD(&p->pi_state_list);
        p->pi_state_cache = NULL;
 #endif
+       uprobe_copy_process(p);
        /*
         * sigaltstack should be cleared when sharing the same VM
         */
@@ -1290,12 +1371,19 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        clear_all_latency_tracing(p);
 
        /* ok, now we should be set up.. */
-       p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);
+       if (clone_flags & CLONE_THREAD)
+               p->exit_signal = -1;
+       else if (clone_flags & CLONE_PARENT)
+               p->exit_signal = current->group_leader->exit_signal;
+       else
+               p->exit_signal = (clone_flags & CSIGNAL);
+
        p->pdeath_signal = 0;
        p->exit_state = 0;
 
        p->nr_dirtied = 0;
        p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
+       p->dirty_paused_when = 0;
 
        /*
         * Ok, make it visible to the rest of the system.
@@ -1303,12 +1391,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
         */
        p->group_leader = p;
        INIT_LIST_HEAD(&p->thread_group);
-
-       /* Now that the task is set up, run cgroup callbacks if
-        * necessary. We need to run them before the task is visible
-        * on the tasklist. */
-       cgroup_fork_callbacks(p);
-       cgroup_callbacks_done = 1;
+       p->task_works = NULL;
 
        /* Need tasklist lock for parent etc handling! */
        write_lock_irq(&tasklist_lock);
@@ -1373,8 +1456,11 @@ static struct task_struct *copy_process(unsigned long clone_flags,
        proc_fork_connector(p);
        cgroup_post_fork(p);
        if (clone_flags & CLONE_THREAD)
-               threadgroup_fork_read_unlock(current);
+               threadgroup_change_end(current);
        perf_event_fork(p);
+
+       trace_task_newtask(p, clone_flags);
+
        return p;
 
 bad_fork_free_pid:
@@ -1384,6 +1470,8 @@ bad_fork_cleanup_io:
        if (p->io_context)
                exit_io_context(p);
 bad_fork_cleanup_namespaces:
+       if (unlikely(clone_flags & CLONE_NEWPID))
+               pid_ns_release_proc(p->nsproxy->pid_ns);
        exit_task_namespaces(p);
 bad_fork_cleanup_mm:
        if (p->mm)
@@ -1408,8 +1496,8 @@ bad_fork_cleanup_policy:
 bad_fork_cleanup_cgroup:
 #endif
        if (clone_flags & CLONE_THREAD)
-               threadgroup_fork_read_unlock(current);
-       cgroup_exit(p, cgroup_callbacks_done);
+               threadgroup_change_end(current);
+       cgroup_exit(p, 0);
        delayacct_tsk_free(p);
        module_put(task_thread_info(p)->exec_domain->module);
 bad_fork_cleanup_count:
@@ -1421,12 +1509,6 @@ fork_out:
        return ERR_PTR(retval);
 }
 
-noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_regs *regs)
-{
-       memset(regs, 0, sizeof(struct pt_regs));
-       return regs;
-}
-
 static inline void init_idle_pids(struct pid_link *links)
 {
        enum pid_type type;
@@ -1440,10 +1522,7 @@ static inline void init_idle_pids(struct pid_link *links)
 struct task_struct * __cpuinit fork_idle(int cpu)
 {
        struct task_struct *task;
-       struct pt_regs regs;
-
-       task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
-                           &init_struct_pid, 0);
+       task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0);
        if (!IS_ERR(task)) {
                init_idle_pids(task->pids);
                init_idle(task, cpu);
@@ -1460,7 +1539,6 @@ struct task_struct * __cpuinit fork_idle(int cpu)
  */
 long do_fork(unsigned long clone_flags,
              unsigned long stack_start,
-             struct pt_regs *regs,
              unsigned long stack_size,
              int __user *parent_tidptr,
              int __user *child_tidptr)
@@ -1490,7 +1568,7 @@ long do_fork(unsigned long clone_flags,
         * requested, no event is reported; otherwise, report if the event
         * for the type of forking is enabled.
         */
-       if (likely(user_mode(regs)) && !(clone_flags & CLONE_UNTRACED)) {
+       if (!(clone_flags & CLONE_UNTRACED)) {
                if (clone_flags & CLONE_VFORK)
                        trace = PTRACE_EVENT_VFORK;
                else if ((clone_flags & CSIGNAL) != SIGCHLD)
@@ -1502,7 +1580,7 @@ long do_fork(unsigned long clone_flags,
                        trace = 0;
        }
 
-       p = copy_process(clone_flags, stack_start, regs, stack_size,
+       p = copy_process(clone_flags, stack_start, stack_size,
                         child_tidptr, NULL, trace);
        /*
         * Do this prior waking up the new thread - the thread pointer
@@ -1521,18 +1599,9 @@ long do_fork(unsigned long clone_flags,
                if (clone_flags & CLONE_VFORK) {
                        p->vfork_done = &vfork;
                        init_completion(&vfork);
+                       get_task_struct(p);
                }
 
-               audit_finish_fork(p);
-
-               /*
-                * We set PF_STARTING at creation in case tracing wants to
-                * use this to distinguish a fully live task from one that
-                * hasn't finished SIGSTOP raising yet.  Now we clear it
-                * and set the child going.
-                */
-               p->flags &= ~PF_STARTING;
-
                wake_up_new_task(p);
 
                /* forking complete and child started to run, tell ptracer */
@@ -1540,10 +1609,8 @@ long do_fork(unsigned long clone_flags,
                        ptrace_event(trace, nr);
 
                if (clone_flags & CLONE_VFORK) {
-                       freezer_do_not_count();
-                       wait_for_completion(&vfork);
-                       freezer_count();
-                       ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
+                       if (!wait_for_vfork_done(p, &vfork))
+                               ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
                }
        } else {
                nr = PTR_ERR(p);
@@ -1551,6 +1618,60 @@ long do_fork(unsigned long clone_flags,
        return nr;
 }
 
+#ifdef CONFIG_GENERIC_KERNEL_THREAD
+/*
+ * Create a kernel thread.
+ */
+pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
+{
+       return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
+               (unsigned long)arg, NULL, NULL);
+}
+#endif
+
+#ifdef __ARCH_WANT_SYS_FORK
+SYSCALL_DEFINE0(fork)
+{
+#ifdef CONFIG_MMU
+       return do_fork(SIGCHLD, 0, 0, NULL, NULL);
+#else
+       /* can not support in nommu mode */
+       return(-EINVAL);
+#endif
+}
+#endif
+
+#ifdef __ARCH_WANT_SYS_VFORK
+SYSCALL_DEFINE0(vfork)
+{
+       return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 
+                       0, NULL, NULL);
+}
+#endif
+
+#ifdef __ARCH_WANT_SYS_CLONE
+#ifdef CONFIG_CLONE_BACKWARDS
+SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
+                int __user *, parent_tidptr,
+                int, tls_val,
+                int __user *, child_tidptr)
+#elif defined(CONFIG_CLONE_BACKWARDS2)
+SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
+                int __user *, parent_tidptr,
+                int __user *, child_tidptr,
+                int, tls_val)
+#else
+SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
+                int __user *, parent_tidptr,
+                int __user *, child_tidptr,
+                int, tls_val)
+#endif
+{
+       return do_fork(clone_flags, newsp, 0,
+               parent_tidptr, child_tidptr);
+}
+#endif
+
 #ifndef ARCH_MIN_MMSTRUCT_ALIGN
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif