kernel: cpuset: Make cpusets restore on hotplug
Riley Andrews [Fri, 12 Jun 2015 21:36:28 +0000 (14:36 -0700)]
This deliberately changes the behavior of the per-cpuset
cpus file to not be effected by hotplug. When a cpu is offlined,
it will be removed from the cpuset/cpus file. When a cpu is onlined,
if the cpuset originally requested that that cpu was part of the cpuset, that
cpu will be restored to the cpuset. The cpus files still
have to be hierachical, but the ranges no longer have to be out of
the currently online cpus, just the physically present cpus.

Bug 1815239

Change-Id: I3efbae24a1f6384be1e603fb56f0d3baef61d924
Signed-off-by: Christopher Freeman <cfreeman@nvidia.com>
Reviewed-on: http://git-master/r/1231559
(cherry picked from commit 2b02e3175e322505383c4c2cb123de34294e77e5 in
rel-24-jetson-vr)
Reviewed-on: http://git-master/r/1235405
Reviewed-by: Maneet Maneet Singh <mmaneetsingh@nvidia.com>
GVS: Gerrit_Virtual_Submit
Reviewed-by: Raymond Poudrier <rapoudrier@nvidia.com>

kernel/cpuset.c

index 9a12f5b..ad0d273 100644 (file)
@@ -85,6 +85,7 @@ struct cpuset {
 
        unsigned long flags;            /* "unsigned long" so bitops work */
        cpumask_var_t cpus_allowed;     /* CPUs allowed to tasks in cpuset */
+       cpumask_var_t cpus_requested;   /* CPUS requested, but not used because of hotplug */
        nodemask_t mems_allowed;        /* Memory Nodes allowed to tasks */
 
        struct fmeter fmeter;           /* memory_pressure filter */
@@ -381,7 +382,7 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs,
 
 static int is_cpuset_subset(const struct cpuset *p, const struct cpuset *q)
 {
-       return  cpumask_subset(p->cpus_allowed, q->cpus_allowed) &&
+       return  cpumask_subset(p->cpus_requested, q->cpus_requested) &&
                nodes_subset(p->mems_allowed, q->mems_allowed) &&
                is_cpu_exclusive(p) <= is_cpu_exclusive(q) &&
                is_mem_exclusive(p) <= is_mem_exclusive(q);
@@ -472,7 +473,7 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial)
        cpuset_for_each_child(c, cont, par) {
                if ((is_cpu_exclusive(trial) || is_cpu_exclusive(c)) &&
                    c != cur &&
-                   cpumask_intersects(trial->cpus_allowed, c->cpus_allowed))
+                   cpumask_intersects(trial->cpus_requested, c->cpus_requested))
                        goto out;
                if ((is_mem_exclusive(trial) || is_mem_exclusive(c)) &&
                    c != cur &&
@@ -881,19 +882,21 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
        if (!*buf) {
                cpumask_clear(trialcs->cpus_allowed);
        } else {
-               retval = cpulist_parse(buf, trialcs->cpus_allowed);
+               retval = cpulist_parse(buf, trialcs->cpus_requested);
                if (retval < 0)
                        return retval;
 
-               if (!cpumask_subset(trialcs->cpus_allowed, cpu_active_mask))
+               if (!cpumask_subset(trialcs->cpus_requested, cpu_present_mask))
                        return -EINVAL;
+
+               cpumask_and(trialcs->cpus_allowed, trialcs->cpus_requested, cpu_active_mask);
        }
        retval = validate_change(cs, trialcs);
        if (retval < 0)
                return retval;
 
        /* Nothing to do if the cpus didn't change */
-       if (cpumask_equal(cs->cpus_allowed, trialcs->cpus_allowed))
+       if (cpumask_equal(cs->cpus_requested, trialcs->cpus_requested))
                return 0;
 
        retval = heap_init(&heap, PAGE_SIZE, GFP_KERNEL, NULL);
@@ -904,6 +907,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
 
        mutex_lock(&callback_mutex);
        cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
+       cpumask_copy(cs->cpus_requested, trialcs->cpus_requested);
        mutex_unlock(&callback_mutex);
 
        /*
@@ -1670,7 +1674,7 @@ static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs)
        size_t count;
 
        mutex_lock(&callback_mutex);
-       count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed);
+       count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_requested);
        mutex_unlock(&callback_mutex);
 
        return count;
@@ -1881,19 +1885,26 @@ static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont)
        cs = kzalloc(sizeof(*cs), GFP_KERNEL);
        if (!cs)
                return ERR_PTR(-ENOMEM);
-       if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL)) {
-               kfree(cs);
-               return ERR_PTR(-ENOMEM);
-       }
+       if (!alloc_cpumask_var(&cs->cpus_allowed, GFP_KERNEL))
+               goto error_allowed;
+       if (!alloc_cpumask_var(&cs->cpus_requested, GFP_KERNEL))
+               goto error_requested;
 
        set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
        cpumask_clear(cs->cpus_allowed);
+       cpumask_clear(cs->cpus_requested);
        nodes_clear(cs->mems_allowed);
        fmeter_init(&cs->fmeter);
        INIT_WORK(&cs->hotplug_work, cpuset_propagate_hotplug_workfn);
        cs->relax_domain_level = -1;
 
        return &cs->css;
+
+error_requested:
+       free_cpumask_var(cs->cpus_allowed);
+error_allowed:
+       kfree(cs);
+       return ERR_PTR(-ENOMEM);
 }
 
 static int cpuset_css_online(struct cgroup *cgrp)
@@ -1944,6 +1955,7 @@ static int cpuset_css_online(struct cgroup *cgrp)
        mutex_lock(&callback_mutex);
        cs->mems_allowed = parent->mems_allowed;
        cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
+       cpumask_copy(cs->cpus_requested, parent->cpus_requested);
        mutex_unlock(&callback_mutex);
 out_unlock:
        mutex_unlock(&cpuset_mutex);
@@ -1976,6 +1988,7 @@ static void cpuset_css_free(struct cgroup *cont)
        struct cpuset *cs = cgroup_cs(cont);
 
        free_cpumask_var(cs->cpus_allowed);
+       free_cpumask_var(cs->cpus_requested);
        kfree(cs);
 }
 
@@ -2006,8 +2019,11 @@ int __init cpuset_init(void)
 
        if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL))
                BUG();
+       if (!alloc_cpumask_var(&top_cpuset.cpus_requested, GFP_KERNEL))
+               BUG();
 
        cpumask_setall(top_cpuset.cpus_allowed);
+       cpumask_setall(top_cpuset.cpus_requested);
        nodes_setall(top_cpuset.mems_allowed);
 
        fmeter_init(&top_cpuset.fmeter);
@@ -2063,20 +2079,22 @@ static void remove_tasks_in_empty_cpuset(struct cpuset *cs)
  */
 static void cpuset_propagate_hotplug_workfn(struct work_struct *work)
 {
-       static cpumask_t off_cpus;
+       static cpumask_t diff, new_allowed;
        static nodemask_t off_mems, tmp_mems;
        struct cpuset *cs = container_of(work, struct cpuset, hotplug_work);
        bool is_empty;
 
        mutex_lock(&cpuset_mutex);
 
-       cpumask_andnot(&off_cpus, cs->cpus_allowed, top_cpuset.cpus_allowed);
+       cpumask_and(&new_allowed, cs->cpus_requested, top_cpuset.cpus_allowed);
+       cpumask_xor(&diff, &new_allowed, cs->cpus_allowed);
+
        nodes_andnot(off_mems, cs->mems_allowed, top_cpuset.mems_allowed);
 
        /* remove offline cpus from @cs */
-       if (!cpumask_empty(&off_cpus)) {
+       if (!cpumask_empty(&diff)) {
                mutex_lock(&callback_mutex);
-               cpumask_andnot(cs->cpus_allowed, cs->cpus_allowed, &off_cpus);
+               cpumask_copy(cs->cpus_allowed, &new_allowed);
                mutex_unlock(&callback_mutex);
                update_tasks_cpumask(cs, NULL);
        }
@@ -2187,7 +2205,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work)
        }
 
        /* if cpus or mems went down, we need to propagate to descendants */
-       if (cpus_offlined || mems_offlined) {
+       if (cpus_updated || mems_updated) {
                struct cpuset *cs;
                struct cgroup *pos_cgrp;