include/linux/cgroup.h

   1 #ifndef _LINUX_CGROUP_H
   2 #define _LINUX_CGROUP_H
   3 /*
   4  *  cgroup interface
   5  *
   6  *  Copyright (C) 2003 BULL SA
   7  *  Copyright (C) 2004-2006 Silicon Graphics, Inc.
   8  *
   9  */
  10
  11 #include <linux/sched.h>
  12 #include <linux/cpumask.h>
  13 #include <linux/nodemask.h>
  14 #include <linux/rcupdate.h>
  15 #include <linux/cgroupstats.h>
  16 #include <linux/prio_heap.h>
  17 #include <linux/rwsem.h>
  18
  19 #ifdef CONFIG_CGROUPS
  20
  21 struct cgroupfs_root;
  22 struct cgroup_subsys;
  23 struct inode;
  24 struct cgroup;
  25
  26 extern int cgroup_init_early(void);
  27 extern int cgroup_init(void);
  28 extern void cgroup_lock(void);
  29 extern bool cgroup_lock_live_group(struct cgroup *cgrp);
  30 extern void cgroup_unlock(void);
  31 extern void cgroup_fork(struct task_struct *p);
  32 extern void cgroup_fork_callbacks(struct task_struct *p);
  33 extern void cgroup_post_fork(struct task_struct *p);
  34 extern void cgroup_exit(struct task_struct *p, int run_callbacks);
  35 extern int cgroupstats_build(struct cgroupstats *stats,
  36                                 struct dentry *dentry);
  37
  38 extern struct file_operations proc_cgroup_operations;
  39
  40 /* Define the enumeration of all cgroup subsystems */
  41 #define SUBSYS(_x) _x ## _subsys_id,
  42 enum cgroup_subsys_id {
  43 #include <linux/cgroup_subsys.h>
  44         CGROUP_SUBSYS_COUNT
  45 };
  46 #undef SUBSYS
  47
  48 /* Per-subsystem/per-cgroup state maintained by the system. */
  49 struct cgroup_subsys_state {
  50         /* The cgroup that this subsystem is attached to. Useful
  51          * for subsystems that want to know about the cgroup
  52          * hierarchy structure */
  53         struct cgroup *cgroup;
  54
  55         /* State maintained by the cgroup system to allow
  56          * subsystems to be "busy". Should be accessed via css_get()
  57          * and css_put() */
  58
  59         atomic_t refcnt;
  60
  61         unsigned long flags;
  62 };
  63
  64 /* bits in struct cgroup_subsys_state flags field */
  65 enum {
  66         CSS_ROOT, /* This CSS is the root of the subsystem */
  67 };
  68
  69 /*
  70  * Call css_get() to hold a reference on the cgroup;
  71  *
  72  */
  73
  74 static inline void css_get(struct cgroup_subsys_state *css)
  75 {
  76         /* We don't need to reference count the root state */
  77         if (!test_bit(CSS_ROOT, &css->flags))
  78                 atomic_inc(&css->refcnt);
  79 }
  80 /*
  81  * css_put() should be called to release a reference taken by
  82  * css_get()
  83  */
  84
  85 extern void __css_put(struct cgroup_subsys_state *css);
  86 static inline void css_put(struct cgroup_subsys_state *css)
  87 {
  88         if (!test_bit(CSS_ROOT, &css->flags))
  89                 __css_put(css);
  90 }
  91
  92 /* bits in struct cgroup flags field */
  93 enum {
  94         /* Control Group is dead */
  95         CGRP_REMOVED,
  96         /* Control Group has previously had a child cgroup or a task,
  97          * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set) */
  98         CGRP_RELEASABLE,
  99         /* Control Group requires release notifications to userspace */
 100         CGRP_NOTIFY_ON_RELEASE,
 101 };
 102
 103 struct cgroup {
 104         unsigned long flags;            /* "unsigned long" so bitops work */
 105
 106         /* count users of this cgroup. >0 means busy, but doesn't
 107          * necessarily indicate the number of tasks in the
 108          * cgroup */
 109         atomic_t count;
 110
 111         /*
 112          * We link our 'sibling' struct into our parent's 'children'.
 113          * Our children link their 'sibling' into our 'children'.
 114          */
 115         struct list_head sibling;       /* my parent's children */
 116         struct list_head children;      /* my children */
 117
 118         struct cgroup *parent;  /* my parent */
 119         struct dentry *dentry;          /* cgroup fs entry, RCU protected */
 120
 121         /* Private pointers for each registered subsystem */
 122         struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
 123
 124         struct cgroupfs_root *root;
 125         struct cgroup *top_cgroup;
 126
 127         /*
 128          * List of cg_cgroup_links pointing at css_sets with
 129          * tasks in this cgroup. Protected by css_set_lock
 130          */
 131         struct list_head css_sets;
 132
 133         /*
 134          * Linked list running through all cgroups that can
 135          * potentially be reaped by the release agent. Protected by
 136          * release_list_lock
 137          */
 138         struct list_head release_list;
 139
 140         /* pids_mutex protects the fields below */
 141         struct rw_semaphore pids_mutex;
 142         /* Array of process ids in the cgroup */
 143         pid_t *tasks_pids;
 144         /* How many files are using the current tasks_pids array */
 145         int pids_use_count;
 146         /* Length of the current tasks_pids array */
 147         int pids_length;
 148
 149         /* For RCU-protected deletion */
 150         struct rcu_head rcu_head;
 151 };
 152
 153 /* A css_set is a structure holding pointers to a set of
 154  * cgroup_subsys_state objects. This saves space in the task struct
 155  * object and speeds up fork()/exit(), since a single inc/dec and a
 156  * list_add()/del() can bump the reference count on the entire
 157  * cgroup set for a task.
 158  */
 159
 160 struct css_set {
 161
 162         /* Reference count */
 163         atomic_t refcount;
 164
 165         /*
 166          * List running through all cgroup groups in the same hash
 167          * slot. Protected by css_set_lock
 168          */
 169         struct hlist_node hlist;
 170
 171         /*
 172          * List running through all tasks using this cgroup
 173          * group. Protected by css_set_lock
 174          */
 175         struct list_head tasks;
 176
 177         /*
 178          * List of cg_cgroup_link objects on link chains from
 179          * cgroups referenced from this css_set. Protected by
 180          * css_set_lock
 181          */
 182         struct list_head cg_links;
 183
 184         /*
 185          * Set of subsystem states, one for each subsystem. This array
 186          * is immutable after creation apart from the init_css_set
 187          * during subsystem registration (at boot time).
 188          */
 189         struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT];
 190 };
 191
 192 /*
 193  * cgroup_map_cb is an abstract callback API for reporting map-valued
 194  * control files
 195  */
 196
 197 struct cgroup_map_cb {
 198         int (*fill)(struct cgroup_map_cb *cb, const char *key, u64 value);
 199         void *state;
 200 };
 201
 202 /* struct cftype:
 203  *
 204  * The files in the cgroup filesystem mostly have a very simple read/write
 205  * handling, some common function will take care of it. Nevertheless some cases
 206  * (read tasks) are special and therefore I define this structure for every
 207  * kind of file.
 208  *
 209  *
 210  * When reading/writing to a file:
 211  *      - the cgroup to use is file->f_dentry->d_parent->d_fsdata
 212  *      - the 'cftype' of the file is file->f_dentry->d_fsdata
 213  */
 214
 215 #define MAX_CFTYPE_NAME 64
 216 struct cftype {
 217         /* By convention, the name should begin with the name of the
 218          * subsystem, followed by a period */
 219         char name[MAX_CFTYPE_NAME];
 220         int private;
 221
 222         /*
 223          * If non-zero, defines the maximum length of string that can
 224          * be passed to write_string; defaults to 64
 225          */
 226         size_t max_write_len;
 227
 228         int (*open)(struct inode *inode, struct file *file);
 229         ssize_t (*read)(struct cgroup *cgrp, struct cftype *cft,
 230                         struct file *file,
 231                         char __user *buf, size_t nbytes, loff_t *ppos);
 232         /*
 233          * read_u64() is a shortcut for the common case of returning a
 234          * single integer. Use it in place of read()
 235          */
 236         u64 (*read_u64)(struct cgroup *cgrp, struct cftype *cft);
 237         /*
 238          * read_s64() is a signed version of read_u64()
 239          */
 240         s64 (*read_s64)(struct cgroup *cgrp, struct cftype *cft);
 241         /*
 242          * read_map() is used for defining a map of key/value
 243          * pairs. It should call cb->fill(cb, key, value) for each
 244          * entry. The key/value pairs (and their ordering) should not
 245          * change between reboots.
 246          */
 247         int (*read_map)(struct cgroup *cont, struct cftype *cft,
 248                         struct cgroup_map_cb *cb);
 249         /*
 250          * read_seq_string() is used for outputting a simple sequence
 251          * using seqfile.
 252          */
 253         int (*read_seq_string)(struct cgroup *cont, struct cftype *cft,
 254                                struct seq_file *m);
 255
 256         ssize_t (*write)(struct cgroup *cgrp, struct cftype *cft,
 257                          struct file *file,
 258                          const char __user *buf, size_t nbytes, loff_t *ppos);
 259
 260         /*
 261          * write_u64() is a shortcut for the common case of accepting
 262          * a single integer (as parsed by simple_strtoull) from
 263          * userspace. Use in place of write(); return 0 or error.
 264          */
 265         int (*write_u64)(struct cgroup *cgrp, struct cftype *cft, u64 val);
 266         /*
 267          * write_s64() is a signed version of write_u64()
 268          */
 269         int (*write_s64)(struct cgroup *cgrp, struct cftype *cft, s64 val);
 270
 271         /*
 272          * write_string() is passed a nul-terminated kernelspace
 273          * buffer of maximum length determined by max_write_len.
 274          * Returns 0 or -ve error code.
 275          */
 276         int (*write_string)(struct cgroup *cgrp, struct cftype *cft,
 277                             const char *buffer);
 278         /*
 279          * trigger() callback can be used to get some kick from the
 280          * userspace, when the actual string written is not important
 281          * at all. The private field can be used to determine the
 282          * kick type for multiplexing.
 283          */
 284         int (*trigger)(struct cgroup *cgrp, unsigned int event);
 285
 286         int (*release)(struct inode *inode, struct file *file);
 287 };
 288
 289 struct cgroup_scanner {
 290         struct cgroup *cg;
 291         int (*test_task)(struct task_struct *p, struct cgroup_scanner *scan);
 292         void (*process_task)(struct task_struct *p,
 293                         struct cgroup_scanner *scan);
 294         struct ptr_heap *heap;
 295 };
 296
 297 /* Add a new file to the given cgroup directory. Should only be
 298  * called by subsystems from within a populate() method */
 299 int cgroup_add_file(struct cgroup *cgrp, struct cgroup_subsys *subsys,
 300                        const struct cftype *cft);
 301
 302 /* Add a set of new files to the given cgroup directory. Should
 303  * only be called by subsystems from within a populate() method */
 304 int cgroup_add_files(struct cgroup *cgrp,
 305                         struct cgroup_subsys *subsys,
 306                         const struct cftype cft[],
 307                         int count);
 308
 309 int cgroup_is_removed(const struct cgroup *cgrp);
 310
 311 int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen);
 312
 313 int cgroup_task_count(const struct cgroup *cgrp);
 314
 315 /* Return true if the cgroup is a descendant of the current cgroup */
 316 int cgroup_is_descendant(const struct cgroup *cgrp);
 317
 318 /* Control Group subsystem type. See Documentation/cgroups.txt for details */
 319
 320 struct cgroup_subsys {
 321         struct cgroup_subsys_state *(*create)(struct cgroup_subsys *ss,
 322                                                   struct cgroup *cgrp);
 323         void (*pre_destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 324         void (*destroy)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 325         int (*can_attach)(struct cgroup_subsys *ss,
 326                           struct cgroup *cgrp, struct task_struct *tsk);
 327         void (*attach)(struct cgroup_subsys *ss, struct cgroup *cgrp,
 328                         struct cgroup *old_cgrp, struct task_struct *tsk);
 329         void (*fork)(struct cgroup_subsys *ss, struct task_struct *task);
 330         void (*exit)(struct cgroup_subsys *ss, struct task_struct *task);
 331         int (*populate)(struct cgroup_subsys *ss,
 332                         struct cgroup *cgrp);
 333         void (*post_clone)(struct cgroup_subsys *ss, struct cgroup *cgrp);
 334         void (*bind)(struct cgroup_subsys *ss, struct cgroup *root);
 335
 336         int subsys_id;
 337         int active;
 338         int disabled;
 339         int early_init;
 340 #define MAX_CGROUP_TYPE_NAMELEN 32
 341         const char *name;
 342
 343         struct cgroupfs_root *root;
 344
 345         struct list_head sibling;
 346 };
 347
 348 #define SUBSYS(_x) extern struct cgroup_subsys _x ## _subsys;
 349 #include <linux/cgroup_subsys.h>
 350 #undef SUBSYS
 351
 352 static inline struct cgroup_subsys_state *cgroup_subsys_state(
 353         struct cgroup *cgrp, int subsys_id)
 354 {
 355         return cgrp->subsys[subsys_id];
 356 }
 357
 358 static inline struct cgroup_subsys_state *task_subsys_state(
 359         struct task_struct *task, int subsys_id)
 360 {
 361         return rcu_dereference(task->cgroups->subsys[subsys_id]);
 362 }
 363
 364 static inline struct cgroup* task_cgroup(struct task_struct *task,
 365                                                int subsys_id)
 366 {
 367         return task_subsys_state(task, subsys_id)->cgroup;
 368 }
 369
 370 int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *ss,
 371                                                         char *nodename);
 372
 373 /* A cgroup_iter should be treated as an opaque object */
 374 struct cgroup_iter {
 375         struct list_head *cg_link;
 376         struct list_head *task;
 377 };
 378
 379 /* To iterate across the tasks in a cgroup:
 380  *
 381  * 1) call cgroup_iter_start to intialize an iterator
 382  *
 383  * 2) call cgroup_iter_next() to retrieve member tasks until it
 384  *    returns NULL or until you want to end the iteration
 385  *
 386  * 3) call cgroup_iter_end() to destroy the iterator.
 387  *
 388  * Or, call cgroup_scan_tasks() to iterate through every task in a cpuset.
 389  *    - cgroup_scan_tasks() holds the css_set_lock when calling the test_task()
 390  *      callback, but not while calling the process_task() callback.
 391  */
 392 void cgroup_iter_start(struct cgroup *cgrp, struct cgroup_iter *it);
 393 struct task_struct *cgroup_iter_next(struct cgroup *cgrp,
 394                                         struct cgroup_iter *it);
 395 void cgroup_iter_end(struct cgroup *cgrp, struct cgroup_iter *it);
 396 int cgroup_scan_tasks(struct cgroup_scanner *scan);
 397 int cgroup_attach_task(struct cgroup *, struct task_struct *);
 398
 399 #else /* !CONFIG_CGROUPS */
 400
 401 static inline int cgroup_init_early(void) { return 0; }
 402 static inline int cgroup_init(void) { return 0; }
 403 static inline void cgroup_fork(struct task_struct *p) {}
 404 static inline void cgroup_fork_callbacks(struct task_struct *p) {}
 405 static inline void cgroup_post_fork(struct task_struct *p) {}
 406 static inline void cgroup_exit(struct task_struct *p, int callbacks) {}
 407
 408 static inline void cgroup_lock(void) {}
 409 static inline void cgroup_unlock(void) {}
 410 static inline int cgroupstats_build(struct cgroupstats *stats,
 411                                         struct dentry *dentry)
 412 {
 413         return -EINVAL;
 414 }
 415
 416 #endif /* !CONFIG_CGROUPS */
 417
 418 #endif /* _LINUX_CGROUP_H */