#define CLONE_NEWUTS 0x04000000 /* New utsname group? */
#define CLONE_NEWIPC 0x08000000 /* New ipcs */
#define CLONE_NEWUSER 0x10000000 /* New user namespace */
+#define CLONE_NEWPID 0x20000000 /* New pid namespace */
#define CLONE_NEWNET 0x40000000 /* New network namespace */
+#define CLONE_IO 0x80000000 /* Clone io context */
/*
* Scheduling policies
#include <linux/mm_types.h>
#include <asm/system.h>
-#include <asm/semaphore.h>
#include <asm/page.h>
#include <asm/ptrace.h>
#include <asm/cputime.h>
#include <linux/proportions.h>
#include <linux/seccomp.h>
#include <linux/rcupdate.h>
-#include <linux/futex.h>
#include <linux/rtmutex.h>
#include <linux/time.h>
#include <linux/hrtimer.h>
#include <linux/task_io_accounting.h>
#include <linux/kobject.h>
+#include <linux/latencytop.h>
#include <asm/processor.h>
+struct mem_cgroup;
struct exec_domain;
struct futex_pi_state;
+struct robust_list_head;
struct bio;
/*
#define TASK_RUNNING 0
#define TASK_INTERRUPTIBLE 1
#define TASK_UNINTERRUPTIBLE 2
-#define TASK_STOPPED 4
-#define TASK_TRACED 8
+#define __TASK_STOPPED 4
+#define __TASK_TRACED 8
/* in tsk->exit_state */
#define EXIT_ZOMBIE 16
#define EXIT_DEAD 32
/* in tsk->state again */
#define TASK_DEAD 64
+#define TASK_WAKEKILL 128
+
+/* Convenience macros for the sake of set_task_state */
+#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
+#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED)
+#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED)
+
+/* Convenience macros for the sake of wake_up */
+#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
+#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
+
+/* get_task_state() */
+#define TASK_REPORT (TASK_RUNNING | TASK_INTERRUPTIBLE | \
+ TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
+ __TASK_TRACED)
+
+#define task_is_traced(task) ((task->state & __TASK_TRACED) != 0)
+#define task_is_stopped(task) ((task->state & __TASK_STOPPED) != 0)
+#define task_is_stopped_or_traced(task) \
+ ((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
+#define task_contributes_to_load(task) \
+ ((task->state & TASK_UNINTERRUPTIBLE) != 0)
#define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0)
extern void sched_init(void);
extern void sched_init_smp(void);
+extern asmlinkage void schedule_tail(struct task_struct *prev);
extern void init_idle(struct task_struct *idle, int cpu);
extern void init_idle_bootup_task(struct task_struct *idle);
}
#endif
+extern unsigned long rt_needs_cpu(int cpu);
+
/*
* Only dump TASK_* tasks. (0 for all tasks)
*/
extern void cpu_init (void);
extern void trap_init(void);
+extern void account_process_tick(struct task_struct *task, int user);
extern void update_process_times(int user);
extern void scheduler_tick(void);
+extern void hrtick_resched(void);
+
+extern void sched_show_task(struct task_struct *p);
#ifdef CONFIG_DETECT_SOFTLOCKUP
extern void softlockup_tick(void);
extern void spawn_softlockup_task(void);
extern void touch_softlockup_watchdog(void);
extern void touch_all_softlockup_watchdogs(void);
-extern int softlockup_thresh;
+extern unsigned long softlockup_thresh;
+extern unsigned long sysctl_hung_task_check_count;
+extern unsigned long sysctl_hung_task_timeout_secs;
+extern unsigned long sysctl_hung_task_warnings;
#else
static inline void softlockup_tick(void)
{
/* Attach to any functions which should be ignored in wchan output. */
#define __sched __attribute__((__section__(".sched.text")))
+
+/* Linker adds these: start and end of __sched functions */
+extern char __sched_text_start[], __sched_text_end[];
+
/* Is this address in the __sched functions? */
extern int in_sched_functions(unsigned long addr);
#define MAX_SCHEDULE_TIMEOUT LONG_MAX
-extern signed long FASTCALL(schedule_timeout(signed long timeout));
+extern signed long schedule_timeout(signed long timeout);
extern signed long schedule_timeout_interruptible(signed long timeout);
+extern signed long schedule_timeout_killable(signed long timeout);
extern signed long schedule_timeout_uninterruptible(signed long timeout);
asmlinkage void schedule(void);
/* ITIMER_REAL timer for the process */
struct hrtimer real_timer;
- struct task_struct *tsk;
+ struct pid *leader_pid;
ktime_t it_real_incr;
/* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */
cputime_t it_prof_incr, it_virt_incr;
/* job control IDs */
- pid_t pgrp;
+
+ /*
+ * pgrp and session fields are deprecated.
+ * use the task_session_Xnr and task_pgrp_Xnr routines below
+ */
+
+ union {
+ pid_t pgrp __deprecated;
+ pid_t __pgrp;
+ };
+
struct pid *tty_old_pgrp;
union {
#define SIGNAL_STOP_CONTINUED 0x00000004 /* SIGCONT since WCONTINUED reap */
#define SIGNAL_GROUP_EXIT 0x00000008 /* group exit in progress */
+/* If true, all threads except ->group_exit_task have pending SIGKILL */
+static inline int signal_group_exit(const struct signal_struct *sig)
+{
+ return (sig->flags & SIGNAL_GROUP_EXIT) ||
+ (sig->group_exit_task != NULL);
+}
+
/*
* Some day this will be a full-fledged user tracking system..
*/
struct hlist_node uidhash_node;
uid_t uid;
-#ifdef CONFIG_FAIR_USER_SCHED
+#ifdef CONFIG_USER_SCHED
struct task_group *tg;
#ifdef CONFIG_SYSFS
- struct kset kset;
- struct subsys_attribute user_attr;
+ struct kobject kobj;
struct work_struct work;
#endif
#endif
};
-#ifdef CONFIG_FAIR_USER_SCHED
-extern int uids_kobject_init(void);
-#else
-static inline int uids_kobject_init(void) { return 0; }
-#endif
+extern int uids_sysfs_init(void);
extern struct user_struct *find_user(uid_t);
#endif
};
+extern void partition_sched_domains(int ndoms_new, cpumask_t *doms_new);
+extern int arch_reinit_sched_domains(void);
+
#endif /* CONFIG_SMP */
/*
struct io_context; /* See blkdev.h */
#define NGROUPS_SMALL 32
-#define NGROUPS_PER_BLOCK ((int)(PAGE_SIZE / sizeof(gid_t)))
+#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t)))
struct group_info {
int ngroups;
atomic_t usage;
void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
void (*yield_task) (struct rq *rq);
+ int (*select_task_rq)(struct task_struct *p, int sync);
void (*check_preempt_curr) (struct rq *rq, struct task_struct *p);
struct task_struct * (*pick_next_task) (struct rq *rq);
void (*put_prev_task) (struct rq *rq, struct task_struct *p);
+#ifdef CONFIG_SMP
unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
- struct rq *busiest,
- unsigned long max_nr_move, unsigned long max_load_move,
+ struct rq *busiest, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, int *this_best_prio);
+ int (*move_one_task) (struct rq *this_rq, int this_cpu,
+ struct rq *busiest, struct sched_domain *sd,
+ enum cpu_idle_type idle);
+ void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
+ void (*post_schedule) (struct rq *this_rq);
+ void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
+#endif
+
void (*set_curr_task) (struct rq *rq);
- void (*task_tick) (struct rq *rq, struct task_struct *p);
+ void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
void (*task_new) (struct rq *rq, struct task_struct *p);
+ void (*set_cpus_allowed)(struct task_struct *p, cpumask_t *newmask);
+
+ void (*join_domain)(struct rq *rq);
+ void (*leave_domain)(struct rq *rq);
+
+ void (*switched_from) (struct rq *this_rq, struct task_struct *task,
+ int running);
+ void (*switched_to) (struct rq *this_rq, struct task_struct *task,
+ int running);
+ void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
+ int oldprio, int running);
+
+#ifdef CONFIG_FAIR_GROUP_SCHED
+ void (*moved_group) (struct task_struct *p);
+#endif
};
struct load_weight {
struct load_weight load; /* for load-balancing */
struct rb_node run_node;
unsigned int on_rq;
- int peer_preempt;
u64 exec_start;
u64 sum_exec_runtime;
u64 vruntime;
u64 prev_sum_exec_runtime;
+ u64 last_wakeup;
+ u64 avg_overlap;
+
#ifdef CONFIG_SCHEDSTATS
u64 wait_start;
u64 wait_max;
+ u64 wait_count;
+ u64 wait_sum;
u64 sleep_start;
u64 sleep_max;
#endif
};
+struct sched_rt_entity {
+ struct list_head run_list;
+ unsigned int time_slice;
+ unsigned long timeout;
+ int nr_cpus_allowed;
+
+#ifdef CONFIG_RT_GROUP_SCHED
+ struct sched_rt_entity *parent;
+ /* rq on which this entity is (to be) queued: */
+ struct rt_rq *rt_rq;
+ /* rq "owned" by this entity/group: */
+ struct rt_rq *my_q;
+#endif
+};
+
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
void *stack;
#endif
int prio, static_prio, normal_prio;
- struct list_head run_list;
const struct sched_class *sched_class;
struct sched_entity se;
+ struct sched_rt_entity rt;
#ifdef CONFIG_PREEMPT_NOTIFIERS
/* list of struct preempt_notifier: */
struct hlist_head preempt_notifiers;
#endif
- unsigned short ioprio;
/*
* fpu_counter contains the number of consecutive context switches
* that the FPU is used. If this is over a threshold, the lazy fpu
unsigned int policy;
cpumask_t cpus_allowed;
- unsigned int time_slice;
+
+#ifdef CONFIG_PREEMPT_RCU
+ int rcu_read_lock_nesting;
+ int rcu_flipctr_idx;
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
#if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
struct sched_info sched_info;
unsigned int rt_priority;
cputime_t utime, stime, utimescaled, stimescaled;
cputime_t gtime;
+ cputime_t prev_utime, prev_stime;
unsigned long nvcsw, nivcsw; /* context switch counts */
struct timespec start_time; /* monotonic time */
struct timespec real_start_time; /* boot based time */
uid_t uid,euid,suid,fsuid;
gid_t gid,egid,sgid,fsgid;
struct group_info *group_info;
- kernel_cap_t cap_effective, cap_inheritable, cap_permitted;
+ kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset;
unsigned keep_capabilities:1;
struct user_struct *user;
#ifdef CONFIG_KEYS
/* ipc stuff */
struct sysv_sem sysvsem;
#endif
+#ifdef CONFIG_DETECT_SOFTLOCKUP
+/* hung task detection */
+ unsigned long last_switch_timestamp;
+ unsigned long last_switch_count;
+#endif
/* CPU-specific state of this task */
struct thread_struct thread;
/* filesystem information */
void *security;
#endif
struct audit_context *audit_context;
+#ifdef CONFIG_AUDITSYSCALL
+ uid_t loginuid;
+ unsigned int sessionid;
+#endif
seccomp_t seccomp;
/* Thread group tracking */
int softirq_context;
#endif
#ifdef CONFIG_LOCKDEP
-# define MAX_LOCK_DEPTH 30UL
+# define MAX_LOCK_DEPTH 48UL
u64 curr_chain_key;
int lockdep_depth;
struct held_lock held_locks[MAX_LOCK_DEPTH];
int make_it_fail;
#endif
struct prop_local_single dirties;
+#ifdef CONFIG_LATENCYTOP
+ int latency_record_count;
+ struct latency_record latency_record[LT_SAVECOUNT];
+#endif
};
/*
tsk->signal->__session = session;
}
+static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp)
+{
+ tsk->signal->__pgrp = pgrp;
+}
+
static inline struct pid *task_pid(struct task_struct *task)
{
return task->pids[PIDTYPE_PID].pid;
* from various namespaces
*
* task_xid_nr() : global id, i.e. the id seen from the init namespace;
- * task_xid_vnr() : virtual id, i.e. the id seen from the namespace the task
- * belongs to. this only makes sence when called in the
- * context of the task that belongs to the same namespace;
+ * task_xid_vnr() : virtual id, i.e. the id seen from the pid namespace of
+ * current.
* task_xid_nr_ns() : id seen from the ns specified;
*
* set_task_vxid() : assigns a virtual id to a task;
*
- * task_ppid_nr_ns() : the parent's id as seen from the namespace specified.
- * the result depends on the namespace and whether the
- * task in question is the namespace's init. e.g. for the
- * namespace's init this will return 0 when called from
- * the namespace of this init, or appropriate id otherwise.
- *
- *
* see also pid_nr() etc in include/linux/pid.h
*/
return tsk->pid;
}
-static inline pid_t task_pid_nr_ns(struct task_struct *tsk,
- struct pid_namespace *ns)
-{
- return pid_nr_ns(task_pid(tsk), ns);
-}
+pid_t task_pid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
static inline pid_t task_pid_vnr(struct task_struct *tsk)
{
return tsk->tgid;
}
-static inline pid_t task_tgid_nr_ns(struct task_struct *tsk,
- struct pid_namespace *ns)
-{
- return pid_nr_ns(task_tgid(tsk), ns);
-}
+pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
static inline pid_t task_tgid_vnr(struct task_struct *tsk)
{
static inline pid_t task_pgrp_nr(struct task_struct *tsk)
{
- return tsk->signal->pgrp;
+ return tsk->signal->__pgrp;
}
-static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk,
- struct pid_namespace *ns)
-{
- return pid_nr_ns(task_pgrp(tsk), ns);
-}
+pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
{
return tsk->signal->__session;
}
-static inline pid_t task_session_nr_ns(struct task_struct *tsk,
- struct pid_namespace *ns)
-{
- return pid_nr_ns(task_session(tsk), ns);
-}
+pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
static inline pid_t task_session_vnr(struct task_struct *tsk)
{
}
-static inline pid_t task_ppid_nr_ns(struct task_struct *tsk,
- struct pid_namespace *ns)
-{
- return pid_nr_ns(task_pid(rcu_dereference(tsk->real_parent)), ns);
-}
-
/**
* pid_alive - check that a task structure is not stale
* @p: Task structure to be checked.
* @tsk: Task structure to be checked.
*
* Check if a task structure is the first user space task the kernel created.
- *
- * TODO: We should inline this function after some cleanups in pid_namespace.h
*/
-extern int is_global_init(struct task_struct *tsk);
+static inline int is_global_init(struct task_struct *tsk)
+{
+ return tsk->pid == 1;
+}
/*
* is_container_init:
* check whether in the task is init in its own pid namespace.
*/
-static inline int is_container_init(struct task_struct *tsk)
-{
- return tsk->pid == 1;
-}
+extern int is_container_init(struct task_struct *tsk);
extern struct pid *cad_pid;
extern void sched_idle_next(void);
+#if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP)
+extern void wake_up_idle_cpu(int cpu);
+#else
+static inline void wake_up_idle_cpu(int cpu) { }
+#endif
+
#ifdef CONFIG_SCHED_DEBUG
extern unsigned int sysctl_sched_latency;
-extern unsigned int sysctl_sched_nr_latency;
+extern unsigned int sysctl_sched_min_granularity;
extern unsigned int sysctl_sched_wakeup_granularity;
extern unsigned int sysctl_sched_batch_wakeup_granularity;
extern unsigned int sysctl_sched_child_runs_first;
extern unsigned int sysctl_sched_features;
extern unsigned int sysctl_sched_migration_cost;
+extern unsigned int sysctl_sched_nr_migrate;
+
+int sched_nr_latency_handler(struct ctl_table *table, int write,
+ struct file *file, void __user *buffer, size_t *length,
+ loff_t *ppos);
#endif
+extern unsigned int sysctl_sched_rt_period;
+extern int sysctl_sched_rt_runtime;
extern unsigned int sysctl_sched_compat_yield;
extern struct mm_struct init_mm;
-#define find_task_by_pid(nr) find_task_by_pid_type(PIDTYPE_PID, nr)
-extern struct task_struct *find_task_by_pid_type(int type, int pid);
-extern void __set_special_pids(pid_t session, pid_t pgrp);
+extern struct pid_namespace init_pid_ns;
+
+/*
+ * find a task by one of its numerical ids
+ *
+ * find_task_by_pid_type_ns():
+ * it is the most generic call - it finds a task by all id,
+ * type and namespace specified
+ * find_task_by_pid_ns():
+ * finds a task by its pid in the specified namespace
+ * find_task_by_vpid():
+ * finds a task by its virtual pid
+ * find_task_by_pid():
+ * finds a task by its global pid
+ *
+ * see also find_pid() etc in include/linux/pid.h
+ */
+
+extern struct task_struct *find_task_by_pid_type_ns(int type, int pid,
+ struct pid_namespace *ns);
+
+extern struct task_struct *find_task_by_pid(pid_t nr);
+extern struct task_struct *find_task_by_vpid(pid_t nr);
+extern struct task_struct *find_task_by_pid_ns(pid_t nr,
+ struct pid_namespace *ns);
+
+extern void __set_special_pids(struct pid *pid);
/* per-UID process charging. */
extern struct user_struct * alloc_uid(struct user_namespace *, uid_t);
extern void do_timer(unsigned long ticks);
-extern int FASTCALL(wake_up_state(struct task_struct * tsk, unsigned int state));
-extern int FASTCALL(wake_up_process(struct task_struct * tsk));
-extern void FASTCALL(wake_up_new_task(struct task_struct * tsk,
- unsigned long clone_flags));
+extern int wake_up_state(struct task_struct *tsk, unsigned int state);
+extern int wake_up_process(struct task_struct *tsk);
+extern void wake_up_new_task(struct task_struct *tsk,
+ unsigned long clone_flags);
#ifdef CONFIG_SMP
extern void kick_process(struct task_struct *tsk);
#else
extern void unblock_all_signals(void);
extern void release_task(struct task_struct * p);
extern int send_sig_info(int, struct siginfo *, struct task_struct *);
-extern int send_group_sig_info(int, struct siginfo *, struct task_struct *);
extern int force_sigsegv(int, struct task_struct *);
extern int force_sig_info(int, struct siginfo *, struct task_struct *);
extern int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
-extern int kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp);
extern int kill_pid_info(int sig, struct siginfo *info, struct pid *pid);
extern int kill_pid_info_as_uid(int, struct siginfo *, struct pid *, uid_t, uid_t, u32);
extern int kill_pgrp(struct pid *pid, int sig, int priv);
extern struct mm_struct * mm_alloc(void);
/* mmdrop drops the mm and the page tables */
-extern void FASTCALL(__mmdrop(struct mm_struct *));
+extern void __mmdrop(struct mm_struct *);
static inline void mmdrop(struct mm_struct * mm)
{
if (unlikely(atomic_dec_and_test(&mm->mm_count)))
struct task_struct *fork_idle(int);
extern void set_task_comm(struct task_struct *tsk, char *from);
-extern void get_task_comm(char *to, struct task_struct *tsk);
+extern char *get_task_comm(char *to, struct task_struct *tsk);
#ifdef CONFIG_SMP
extern void wait_task_inactive(struct task_struct * p);
return p->pid == p->tgid;
}
+static inline
+int same_thread_group(struct task_struct *p1, struct task_struct *p2)
+{
+ return p1->tgid == p2->tgid;
+}
+
static inline struct task_struct *next_thread(const struct task_struct *p)
{
return list_entry(rcu_dereference(p->thread_group.next),
{
return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
}
-
+
+extern int __fatal_signal_pending(struct task_struct *p);
+
+static inline int fatal_signal_pending(struct task_struct *p)
+{
+ return signal_pending(p) && __fatal_signal_pending(p);
+}
+
static inline int need_resched(void)
{
return unlikely(test_thread_flag(TIF_NEED_RESCHED));
* cond_resched_lock() will drop the spinlock before scheduling,
* cond_resched_softirq() will enable bhs before scheduling.
*/
-extern int cond_resched(void);
-extern int cond_resched_lock(spinlock_t * lock);
-extern int cond_resched_softirq(void);
-
-/*
- * Does a critical section need to be broken due to another
- * task waiting?:
- */
-#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP)
-# define need_lockbreak(lock) ((lock)->break_lock)
+#ifdef CONFIG_PREEMPT
+static inline int cond_resched(void)
+{
+ return 0;
+}
#else
-# define need_lockbreak(lock) 0
+extern int _cond_resched(void);
+static inline int cond_resched(void)
+{
+ return _cond_resched();
+}
#endif
+extern int cond_resched_lock(spinlock_t * lock);
+extern int cond_resched_softirq(void);
/*
* Does a critical section need to be broken due to another
- * task waiting or preemption being signalled:
+ * task waiting?: (technically does not depend on CONFIG_PREEMPT,
+ * but a general need for low latency)
*/
-static inline int lock_need_resched(spinlock_t *lock)
+static inline int spin_needbreak(spinlock_t *lock)
{
- if (need_lockbreak(lock) || need_resched())
- return 1;
+#ifdef CONFIG_PREEMPT
+ return spin_is_contended(lock);
+#else
return 0;
+#endif
}
/*
extern void normalize_rt_tasks(void);
-#ifdef CONFIG_FAIR_GROUP_SCHED
+#ifdef CONFIG_GROUP_SCHED
extern struct task_group init_task_group;
extern struct task_group *sched_create_group(void);
extern void sched_destroy_group(struct task_group *tg);
extern void sched_move_task(struct task_struct *tsk);
+#ifdef CONFIG_FAIR_GROUP_SCHED
extern int sched_group_set_shares(struct task_group *tg, unsigned long shares);
extern unsigned long sched_group_shares(struct task_group *tg);
-
+#endif
+#ifdef CONFIG_RT_GROUP_SCHED
+extern int sched_group_set_rt_runtime(struct task_group *tg,
+ long rt_runtime_us);
+extern long sched_group_rt_runtime(struct task_group *tg);
+#endif
#endif
#ifdef CONFIG_TASK_XACCT
}
#endif
+#ifdef CONFIG_SMP
+void migration_init(void);
+#else
+static inline void migration_init(void)
+{
+}
+#endif
+
+#ifndef TASK_SIZE_OF
+#define TASK_SIZE_OF(tsk) TASK_SIZE
+#endif
+
#endif /* __KERNEL__ */
#endif