#include <linux/spinlock.h>
#include <linux/fs.h>
+#include <asm/uaccess.h>
+
struct cgroup_subsys mem_cgroup_subsys;
static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
* spin_lock to protect the per cgroup LRU
*/
spinlock_t lru_lock;
+ unsigned long control_type; /* control RSS or RSS+Pagecache */
};
/*
/* mapped and cached states */
};
+enum {
+ MEM_CGROUP_TYPE_UNSPEC = 0,
+ MEM_CGROUP_TYPE_MAPPED,
+ MEM_CGROUP_TYPE_CACHED,
+ MEM_CGROUP_TYPE_ALL,
+ MEM_CGROUP_TYPE_MAX,
+};
+
+static struct mem_cgroup init_mem_cgroup;
static inline
struct mem_cgroup *mem_cgroup_from_cont(struct cgroup *cont)
(page->page_cgroup & ~PAGE_CGROUP_LOCK);
}
-void __always_inline lock_page_cgroup(struct page *page)
+static void __always_inline lock_page_cgroup(struct page *page)
{
bit_spin_lock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
VM_BUG_ON(!page_cgroup_locked(page));
}
-void __always_inline unlock_page_cgroup(struct page *page)
+static void __always_inline unlock_page_cgroup(struct page *page)
{
bit_spin_unlock(PAGE_CGROUP_LOCK_BIT, &page->page_cgroup);
}
-void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
+static void __mem_cgroup_move_lists(struct page_cgroup *pc, bool active)
{
if (active)
list_move(&pc->lru, &pc->mem_cgroup->active_list);
list_move(&pc->lru, &pc->mem_cgroup->inactive_list);
}
+int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
+{
+ int ret;
+
+ task_lock(task);
+ ret = task->mm && mm_cgroup(task->mm) == mem;
+ task_unlock(task);
+ return ret;
+}
+
/*
* This routine assumes that the appropriate zone's lru lock is already held
*/
* 0 if the charge was successful
* < 0 if the cgroup is over its limit
*/
-int mem_cgroup_charge(struct page *page, struct mm_struct *mm)
+int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask)
{
struct mem_cgroup *mem;
struct page_cgroup *pc, *race_pc;
unlock_page_cgroup(page);
- pc = kzalloc(sizeof(struct page_cgroup), GFP_KERNEL);
+ pc = kzalloc(sizeof(struct page_cgroup), gfp_mask);
if (pc == NULL)
goto err;
* If we created the page_cgroup, we should free it on exceeding
* the cgroup limit.
*/
- while (res_counter_charge(&mem->res, 1)) {
- if (try_to_free_mem_cgroup_pages(mem))
+ while (res_counter_charge(&mem->res, PAGE_SIZE)) {
+ bool is_atomic = gfp_mask & GFP_ATOMIC;
+ /*
+ * We cannot reclaim under GFP_ATOMIC, fail the charge
+ */
+ if (is_atomic)
+ goto noreclaim;
+
+ if (try_to_free_mem_cgroup_pages(mem, gfp_mask))
continue;
/*
congestion_wait(WRITE, HZ/10);
continue;
}
-
+noreclaim:
css_put(&mem->css);
+ if (!is_atomic)
+ mem_cgroup_out_of_memory(mem, GFP_KERNEL);
goto free_pc;
}
kfree(pc);
pc = race_pc;
atomic_inc(&pc->ref_cnt);
- res_counter_uncharge(&mem->res, 1);
+ res_counter_uncharge(&mem->res, PAGE_SIZE);
css_put(&mem->css);
goto done;
}
}
/*
+ * See if the cached pages should be charged at all?
+ */
+int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
+ gfp_t gfp_mask)
+{
+ struct mem_cgroup *mem;
+ if (!mm)
+ mm = &init_mm;
+
+ mem = rcu_dereference(mm->mem_cgroup);
+ if (mem->control_type == MEM_CGROUP_TYPE_ALL)
+ return mem_cgroup_charge(page, mm, gfp_mask);
+ else
+ return 0;
+}
+
+/*
* Uncharging is always a welcome operation, we never complain, simply
* uncharge.
*/
struct page *page;
unsigned long flags;
+ /*
+ * This can handle cases when a page is not charged at all and we
+ * are switching between handling the control_type.
+ */
if (!pc)
return;
css_put(&mem->css);
page_assign_page_cgroup(page, NULL);
unlock_page_cgroup(page);
- res_counter_uncharge(&mem->res, 1);
+ res_counter_uncharge(&mem->res, PAGE_SIZE);
spin_lock_irqsave(&mem->lru_lock, flags);
list_del_init(&pc->lru);
}
}
-static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
- struct file *file, char __user *userbuf, size_t nbytes,
- loff_t *ppos)
+int mem_cgroup_write_strategy(char *buf, unsigned long long *tmp)
+{
+ *tmp = memparse(buf, &buf);
+ if (*buf != '\0')
+ return -EINVAL;
+
+ /*
+ * Round up the value to the closest page size
+ */
+ *tmp = ((*tmp + PAGE_SIZE - 1) >> PAGE_SHIFT) << PAGE_SHIFT;
+ return 0;
+}
+
+static ssize_t mem_cgroup_read(struct cgroup *cont,
+ struct cftype *cft, struct file *file,
+ char __user *userbuf, size_t nbytes, loff_t *ppos)
{
return res_counter_read(&mem_cgroup_from_cont(cont)->res,
- cft->private, userbuf, nbytes, ppos);
+ cft->private, userbuf, nbytes, ppos,
+ NULL);
}
static ssize_t mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
size_t nbytes, loff_t *ppos)
{
return res_counter_write(&mem_cgroup_from_cont(cont)->res,
- cft->private, userbuf, nbytes, ppos);
+ cft->private, userbuf, nbytes, ppos,
+ mem_cgroup_write_strategy);
+}
+
+static ssize_t mem_control_type_write(struct cgroup *cont,
+ struct cftype *cft, struct file *file,
+ const char __user *userbuf,
+ size_t nbytes, loff_t *pos)
+{
+ int ret;
+ char *buf, *end;
+ unsigned long tmp;
+ struct mem_cgroup *mem;
+
+ mem = mem_cgroup_from_cont(cont);
+ buf = kmalloc(nbytes + 1, GFP_KERNEL);
+ ret = -ENOMEM;
+ if (buf == NULL)
+ goto out;
+
+ buf[nbytes] = 0;
+ ret = -EFAULT;
+ if (copy_from_user(buf, userbuf, nbytes))
+ goto out_free;
+
+ ret = -EINVAL;
+ tmp = simple_strtoul(buf, &end, 10);
+ if (*end != '\0')
+ goto out_free;
+
+ if (tmp <= MEM_CGROUP_TYPE_UNSPEC || tmp >= MEM_CGROUP_TYPE_MAX)
+ goto out_free;
+
+ mem->control_type = tmp;
+ ret = nbytes;
+out_free:
+ kfree(buf);
+out:
+ return ret;
+}
+
+static ssize_t mem_control_type_read(struct cgroup *cont,
+ struct cftype *cft,
+ struct file *file, char __user *userbuf,
+ size_t nbytes, loff_t *ppos)
+{
+ unsigned long val;
+ char buf[64], *s;
+ struct mem_cgroup *mem;
+
+ mem = mem_cgroup_from_cont(cont);
+ s = buf;
+ val = mem->control_type;
+ s += sprintf(s, "%lu\n", val);
+ return simple_read_from_buffer((void __user *)userbuf, nbytes,
+ ppos, buf, s - buf);
}
static struct cftype mem_cgroup_files[] = {
{
- .name = "usage",
+ .name = "usage_in_bytes",
.private = RES_USAGE,
.read = mem_cgroup_read,
},
{
- .name = "limit",
+ .name = "limit_in_bytes",
.private = RES_LIMIT,
.write = mem_cgroup_write,
.read = mem_cgroup_read,
.private = RES_FAILCNT,
.read = mem_cgroup_read,
},
+ {
+ .name = "control_type",
+ .write = mem_control_type_write,
+ .read = mem_control_type_read,
+ },
};
static struct mem_cgroup init_mem_cgroup;
INIT_LIST_HEAD(&mem->active_list);
INIT_LIST_HEAD(&mem->inactive_list);
spin_lock_init(&mem->lru_lock);
+ mem->control_type = MEM_CGROUP_TYPE_ALL;
return &mem->css;
}