Merge branch 'core-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...

[linux-2.6.git] / include / linux / sched.h
diff --git a/include/linux/sched.h b/include/linux/sched.h

index 5a2ab3c2757da6ce2bfa2153d42cf1041f828321..2234985a5e6546a7476cfb7801032d14dd524c0c 100644 (file)
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -220,7 +220,7 @@ extern char ___assert_task_state[1 - 2*!!(
                         ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
  #define task_contributes_to_load(task) \
                                 ((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
-                                (task->flags & PF_FREEZING) == 0)
+                                (task->flags & PF_FROZEN) == 0)
  
  #define __set_task_state(tsk, state_value)             \
         do { (tsk)->state = (state_value); } while (0)
@@ -637,13 +637,15 @@ struct signal_struct {
  #endif
  #ifdef CONFIG_CGROUPS
         /*
-        * The threadgroup_fork_lock prevents threads from forking with
-        * CLONE_THREAD while held for writing. Use this for fork-sensitive
-        * threadgroup-wide operations. It's taken for reading in fork.c in
-        * copy_process().
-        * Currently only needed write-side by cgroups.
+        * group_rwsem prevents new tasks from entering the threadgroup and
+        * member tasks from exiting,a more specifically, setting of
+        * PF_EXITING.  fork and exit paths are protected with this rwsem
+        * using threadgroup_change_begin/end().  Users which require
+        * threadgroup to remain stable should use threadgroup_[un]lock()
+        * which also takes care of exec path.  Currently, cgroup is the
+        * only user.
          */
-       struct rw_semaphore threadgroup_fork_lock;
+       struct rw_semaphore group_rwsem;
  #endif
  
         int oom_adj;            /* OOM kill score adjustment (bit shift) */
@@ -1542,6 +1544,7 @@ struct task_struct {
          */
         int nr_dirtied;
         int nr_dirtied_pause;
+       unsigned long dirty_paused_when; /* start of a write-and-pause period */
  
  #ifdef CONFIG_LATENCYTOP
         int latency_record_count;
@@ -1787,7 +1790,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
  #define PF_MEMALLOC    0x00000800      /* Allocating memory */
  #define PF_NPROC_EXCEEDED 0x00001000   /* set_user noticed that RLIMIT_NPROC was exceeded */
  #define PF_USED_MATH   0x00002000      /* if unset the fpu must be initialized before use */
-#define PF_FREEZING    0x00004000      /* freeze in progress. do not account to load */
  #define PF_NOFREEZE    0x00008000      /* this thread should not be frozen */
  #define PF_FROZEN      0x00010000      /* frozen for system suspend */
  #define PF_FSTRANS     0x00020000      /* inside a filesystem transaction */
@@ -1803,7 +1805,6 @@ extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *
  #define PF_MEMPOLICY   0x10000000      /* Non-default NUMA mempolicy */
  #define PF_MUTEX_TESTER        0x20000000      /* Thread belongs to the rt mutex tester */
  #define PF_FREEZER_SKIP        0x40000000      /* Freezer should not count it as freezable */
-#define PF_FREEZER_NOSIG 0x80000000    /* Freezer won't send signals to it */
  
  /*
   * Only the _current_ task can read/write to tsk->flags, but other
@@ -2085,6 +2086,14 @@ extern int sched_setscheduler(struct task_struct *, int,
  extern int sched_setscheduler_nocheck(struct task_struct *, int,
                                       const struct sched_param *);
  extern struct task_struct *idle_task(int cpu);
+/**
+ * is_idle_task - is the specified task an idle task?
+ * @p: the task in question.
+ */
+static inline bool is_idle_task(const struct task_struct *p)
+{
+       return p->pid == 0;
+}
  extern struct task_struct *curr_task(int cpu);
  extern void set_curr_task(int cpu, struct task_struct *p);
  
@@ -2266,7 +2275,7 @@ extern void __cleanup_sighand(struct sighand_struct *);
  extern void exit_itimers(struct signal_struct *);
  extern void flush_itimer_signals(void);
  
-extern NORET_TYPE void do_group_exit(int);
+extern void do_group_exit(int);
  
  extern void daemonize(const char *, ...);
  extern int allow_signal(int);
@@ -2388,29 +2397,62 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
         spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
  }
  
-/* See the declaration of threadgroup_fork_lock in signal_struct. */
  #ifdef CONFIG_CGROUPS
-static inline void threadgroup_fork_read_lock(struct task_struct *tsk)
+static inline void threadgroup_change_begin(struct task_struct *tsk)
  {
-       down_read(&tsk->signal->threadgroup_fork_lock);
+       down_read(&tsk->signal->group_rwsem);
  }
-static inline void threadgroup_fork_read_unlock(struct task_struct *tsk)
+static inline void threadgroup_change_end(struct task_struct *tsk)
  {
-       up_read(&tsk->signal->threadgroup_fork_lock);
+       up_read(&tsk->signal->group_rwsem);
  }
-static inline void threadgroup_fork_write_lock(struct task_struct *tsk)
+
+/**
+ * threadgroup_lock - lock threadgroup
+ * @tsk: member task of the threadgroup to lock
+ *
+ * Lock the threadgroup @tsk belongs to.  No new task is allowed to enter
+ * and member tasks aren't allowed to exit (as indicated by PF_EXITING) or
+ * perform exec.  This is useful for cases where the threadgroup needs to
+ * stay stable across blockable operations.
+ *
+ * fork and exit paths explicitly call threadgroup_change_{begin|end}() for
+ * synchronization.  While held, no new task will be added to threadgroup
+ * and no existing live task will have its PF_EXITING set.
+ *
+ * During exec, a task goes and puts its thread group through unusual
+ * changes.  After de-threading, exclusive access is assumed to resources
+ * which are usually shared by tasks in the same group - e.g. sighand may
+ * be replaced with a new one.  Also, the exec'ing task takes over group
+ * leader role including its pid.  Exclude these changes while locked by
+ * grabbing cred_guard_mutex which is used to synchronize exec path.
+ */
+static inline void threadgroup_lock(struct task_struct *tsk)
  {
-       down_write(&tsk->signal->threadgroup_fork_lock);
+       /*
+        * exec uses exit for de-threading nesting group_rwsem inside
+        * cred_guard_mutex. Grab cred_guard_mutex first.
+        */
+       mutex_lock(&tsk->signal->cred_guard_mutex);
+       down_write(&tsk->signal->group_rwsem);
  }
-static inline void threadgroup_fork_write_unlock(struct task_struct *tsk)
+
+/**
+ * threadgroup_unlock - unlock threadgroup
+ * @tsk: member task of the threadgroup to unlock
+ *
+ * Reverse threadgroup_lock().
+ */
+static inline void threadgroup_unlock(struct task_struct *tsk)
  {
-       up_write(&tsk->signal->threadgroup_fork_lock);
+       up_write(&tsk->signal->group_rwsem);
+       mutex_unlock(&tsk->signal->cred_guard_mutex);
  }
  #else
-static inline void threadgroup_fork_read_lock(struct task_struct *tsk) {}
-static inline void threadgroup_fork_read_unlock(struct task_struct *tsk) {}
-static inline void threadgroup_fork_write_lock(struct task_struct *tsk) {}
-static inline void threadgroup_fork_write_unlock(struct task_struct *tsk) {}
+static inline void threadgroup_change_begin(struct task_struct *tsk) {}
+static inline void threadgroup_change_end(struct task_struct *tsk) {}
+static inline void threadgroup_lock(struct task_struct *tsk) {}
+static inline void threadgroup_unlock(struct task_struct *tsk) {}
  #endif
  
  #ifndef __HAVE_THREAD_FUNCTIONS