Merge git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-cpumask
Linus Torvalds [Sun, 5 Apr 2009 17:33:07 +0000 (10:33 -0700)]
* git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux-2.6-cpumask: (36 commits)
  cpumask: remove cpumask allocation from idle_balance, fix
  numa, cpumask: move numa_node_id default implementation to topology.h, fix
  cpumask: remove cpumask allocation from idle_balance
  x86: cpumask: x86 mmio-mod.c use cpumask_var_t for downed_cpus
  x86: cpumask: update 32-bit APM not to mug current->cpus_allowed
  x86: microcode: cleanup
  x86: cpumask: use work_on_cpu in arch/x86/kernel/microcode_core.c
  cpumask: fix CONFIG_CPUMASK_OFFSTACK=y cpu hotunplug crash
  numa, cpumask: move numa_node_id default implementation to topology.h
  cpumask: convert node_to_cpumask_map[] to cpumask_var_t
  cpumask: remove x86 cpumask_t uses.
  cpumask: use cpumask_var_t in uv_flush_tlb_others.
  cpumask: remove cpumask_t assignment from vector_allocation_domain()
  cpumask: make Xen use the new operators.
  cpumask: clean up summit's send_IPI functions
  cpumask: use new cpumask functions throughout x86
  x86: unify cpu_callin_mask/cpu_callout_mask/cpu_initialized_mask/cpu_sibling_setup_mask
  cpumask: convert struct cpuinfo_x86's llc_shared_map to cpumask_var_t
  cpumask: convert node_to_cpumask_map[] to cpumask_var_t
  x86: unify 32 and 64-bit node_to_cpumask_map
  ...

1  2 
arch/x86/include/asm/pci.h
drivers/pci/pci-driver.c
include/linux/mmzone.h
kernel/sched.c
mm/page_alloc.c
mm/slab.c
mm/vmscan.c
net/sunrpc/svc.c

@@@ -86,43 -86,12 +86,43 @@@ static inline void early_quirks(void) 
  
  extern void pci_iommu_alloc(void);
  
 -#endif  /* __KERNEL__ */
 +/* MSI arch hook */
 +#define arch_setup_msi_irqs arch_setup_msi_irqs
 +
 +#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys)
 +
 +#if defined(CONFIG_X86_64) || defined(CONFIG_DMA_API_DEBUG)
 +
 +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)       \
 +              dma_addr_t ADDR_NAME;
 +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME)         \
 +              __u32 LEN_NAME;
 +#define pci_unmap_addr(PTR, ADDR_NAME)                  \
 +              ((PTR)->ADDR_NAME)
 +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL)         \
 +              (((PTR)->ADDR_NAME) = (VAL))
 +#define pci_unmap_len(PTR, LEN_NAME)                    \
 +              ((PTR)->LEN_NAME)
 +#define pci_unmap_len_set(PTR, LEN_NAME, VAL)           \
 +              (((PTR)->LEN_NAME) = (VAL))
  
 -#ifdef CONFIG_X86_32
 -# include "pci_32.h"
  #else
 -# include "pci_64.h"
 +
 +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME)       dma_addr_t ADDR_NAME[0];
 +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) unsigned LEN_NAME[0];
 +#define pci_unmap_addr(PTR, ADDR_NAME)  sizeof((PTR)->ADDR_NAME)
 +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \
 +              do { break; } while (pci_unmap_addr(PTR, ADDR_NAME))
 +#define pci_unmap_len(PTR, LEN_NAME)            sizeof((PTR)->LEN_NAME)
 +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \
 +              do { break; } while (pci_unmap_len(PTR, LEN_NAME))
 +
 +#endif
 +
 +#endif  /* __KERNEL__ */
 +
 +#ifdef CONFIG_X86_64
 +#include "pci_64.h"
  #endif
  
  /* implement the pci_ DMA API in terms of the generic device dma_ one */
@@@ -140,11 -109,6 +140,6 @@@ static inline int __pcibus_to_node(cons
        return sd->node;
  }
  
- static inline cpumask_t __pcibus_to_cpumask(struct pci_bus *bus)
- {
-       return node_to_cpumask(__pcibus_to_node(bus));
- }
  static inline const struct cpumask *
  cpumask_of_pcibus(const struct pci_bus *bus)
  {
diff --combined drivers/pci/pci-driver.c
@@@ -99,52 -99,6 +99,52 @@@ store_new_id(struct device_driver *driv
  }
  static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id);
  
 +/**
 + * store_remove_id - remove a PCI device ID from this driver
 + * @driver: target device driver
 + * @buf: buffer for scanning device ID data
 + * @count: input size
 + *
 + * Removes a dynamic pci device ID to this driver.
 + */
 +static ssize_t
 +store_remove_id(struct device_driver *driver, const char *buf, size_t count)
 +{
 +      struct pci_dynid *dynid, *n;
 +      struct pci_driver *pdrv = to_pci_driver(driver);
 +      __u32 vendor, device, subvendor = PCI_ANY_ID,
 +              subdevice = PCI_ANY_ID, class = 0, class_mask = 0;
 +      int fields = 0;
 +      int retval = -ENODEV;
 +
 +      fields = sscanf(buf, "%x %x %x %x %x %x",
 +                      &vendor, &device, &subvendor, &subdevice,
 +                      &class, &class_mask);
 +      if (fields < 2)
 +              return -EINVAL;
 +
 +      spin_lock(&pdrv->dynids.lock);
 +      list_for_each_entry_safe(dynid, n, &pdrv->dynids.list, node) {
 +              struct pci_device_id *id = &dynid->id;
 +              if ((id->vendor == vendor) &&
 +                  (id->device == device) &&
 +                  (subvendor == PCI_ANY_ID || id->subvendor == subvendor) &&
 +                  (subdevice == PCI_ANY_ID || id->subdevice == subdevice) &&
 +                  !((id->class ^ class) & class_mask)) {
 +                      list_del(&dynid->node);
 +                      kfree(dynid);
 +                      retval = 0;
 +                      break;
 +              }
 +      }
 +      spin_unlock(&pdrv->dynids.lock);
 +
 +      if (retval)
 +              return retval;
 +      return count;
 +}
 +static DRIVER_ATTR(remove_id, S_IWUSR, NULL, store_remove_id);
 +
  static void
  pci_free_dynids(struct pci_driver *drv)
  {
@@@ -171,20 -125,6 +171,20 @@@ static void pci_remove_newid_file(struc
  {
        driver_remove_file(&drv->driver, &driver_attr_new_id);
  }
 +
 +static int
 +pci_create_removeid_file(struct pci_driver *drv)
 +{
 +      int error = 0;
 +      if (drv->probe != NULL)
 +              error = driver_create_file(&drv->driver,&driver_attr_remove_id);
 +      return error;
 +}
 +
 +static void pci_remove_removeid_file(struct pci_driver *drv)
 +{
 +      driver_remove_file(&drv->driver, &driver_attr_remove_id);
 +}
  #else /* !CONFIG_HOTPLUG */
  static inline void pci_free_dynids(struct pci_driver *drv) {}
  static inline int pci_create_newid_file(struct pci_driver *drv)
        return 0;
  }
  static inline void pci_remove_newid_file(struct pci_driver *drv) {}
 +static inline int pci_create_removeid_file(struct pci_driver *drv)
 +{
 +      return 0;
 +}
 +static inline void pci_remove_removeid_file(struct pci_driver *drv) {}
  #endif
  
  /**
@@@ -277,10 -212,9 +277,9 @@@ static int pci_call_probe(struct pci_dr
        node = dev_to_node(&dev->dev);
        if (node >= 0) {
                int cpu;
-               node_to_cpumask_ptr(nodecpumask, node);
  
                get_online_cpus();
-               cpu = cpumask_any_and(nodecpumask, cpu_online_mask);
+               cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
                if (cpu < nr_cpu_ids)
                        error = work_on_cpu(cpu, local_pci_probe, &ddi);
                else
@@@ -964,23 -898,13 +963,23 @@@ int __pci_register_driver(struct pci_dr
        /* register with core */
        error = driver_register(&drv->driver);
        if (error)
 -              return error;
 +              goto out;
  
        error = pci_create_newid_file(drv);
        if (error)
 -              driver_unregister(&drv->driver);
 +              goto out_newid;
  
 +      error = pci_create_removeid_file(drv);
 +      if (error)
 +              goto out_removeid;
 +out:
        return error;
 +
 +out_removeid:
 +      pci_remove_newid_file(drv);
 +out_newid:
 +      driver_unregister(&drv->driver);
 +      goto out;
  }
  
  /**
  void
  pci_unregister_driver(struct pci_driver *drv)
  {
 +      pci_remove_removeid_file(drv);
        pci_remove_newid_file(drv);
        driver_unregister(&drv->driver);
        pci_free_dynids(drv);
@@@ -1096,7 -1019,6 +1095,7 @@@ struct bus_type pci_bus_type = 
        .remove         = pci_device_remove,
        .shutdown       = pci_device_shutdown,
        .dev_attrs      = pci_dev_attrs,
 +      .bus_attrs      = pci_bus_attrs,
        .pm             = PCI_PM_OPS_PTR,
  };
  
diff --combined include/linux/mmzone.h
@@@ -764,12 -764,6 +764,6 @@@ extern int numa_zonelist_order_handler(
  extern char numa_zonelist_order[];
  #define NUMA_ZONELIST_ORDER_LEN 16    /* string buffer size */
  
- #include <linux/topology.h>
- /* Returns the number of the current Node. */
- #ifndef numa_node_id
- #define numa_node_id()                (cpu_to_node(raw_smp_processor_id()))
- #endif
  #ifndef CONFIG_NEED_MULTIPLE_NODES
  
  extern struct pglist_data contig_page_data;
@@@ -806,14 -800,6 +800,14 @@@ extern struct zone *next_zone(struct zo
             zone;                                      \
             zone = next_zone(zone))
  
 +#define for_each_populated_zone(zone)                 \
 +      for (zone = (first_online_pgdat())->node_zones; \
 +           zone;                                      \
 +           zone = next_zone(zone))                    \
 +              if (!populated_zone(zone))              \
 +                      ; /* do nothing */              \
 +              else
 +
  static inline struct zone *zonelist_zone(struct zoneref *zoneref)
  {
        return zoneref->zone;
diff --combined kernel/sched.c
@@@ -1110,7 -1110,7 +1110,7 @@@ static void hrtick_start(struct rq *rq
        if (rq == this_rq()) {
                hrtimer_restart(timer);
        } else if (!rq->hrtick_csd_pending) {
 -              __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd);
 +              __smp_call_function_single(cpu_of(rq), &rq->hrtick_csd, 0);
                rq->hrtick_csd_pending = 1;
        }
  }
@@@ -3818,19 -3818,23 +3818,23 @@@ find_busiest_queue(struct sched_group *
   */
  #define MAX_PINNED_INTERVAL   512
  
+ /* Working cpumask for load_balance and load_balance_newidle. */
+ static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask);
  /*
   * Check this_cpu to ensure it is balanced within domain. Attempt to move
   * tasks if there is an imbalance.
   */
  static int load_balance(int this_cpu, struct rq *this_rq,
                        struct sched_domain *sd, enum cpu_idle_type idle,
-                       int *balance, struct cpumask *cpus)
+                       int *balance)
  {
        int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
        struct sched_group *group;
        unsigned long imbalance;
        struct rq *busiest;
        unsigned long flags;
+       struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
  
        cpumask_setall(cpus);
  
@@@ -3985,8 -3989,7 +3989,7 @@@ out
   * this_rq is locked.
   */
  static int
- load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd,
-                       struct cpumask *cpus)
+ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
  {
        struct sched_group *group;
        struct rq *busiest = NULL;
        int ld_moved = 0;
        int sd_idle = 0;
        int all_pinned = 0;
+       struct cpumask *cpus = __get_cpu_var(load_balance_tmpmask);
  
        cpumask_setall(cpus);
  
@@@ -4134,10 -4138,6 +4138,6 @@@ static void idle_balance(int this_cpu, 
        struct sched_domain *sd;
        int pulled_task = 0;
        unsigned long next_balance = jiffies + HZ;
-       cpumask_var_t tmpmask;
-       if (!alloc_cpumask_var(&tmpmask, GFP_ATOMIC))
-               return;
  
        for_each_domain(this_cpu, sd) {
                unsigned long interval;
                if (sd->flags & SD_BALANCE_NEWIDLE)
                        /* If we've pulled tasks over stop searching: */
                        pulled_task = load_balance_newidle(this_cpu, this_rq,
-                                                          sd, tmpmask);
+                                                          sd);
  
                interval = msecs_to_jiffies(sd->balance_interval);
                if (time_after(next_balance, sd->last_balance + interval))
                 */
                this_rq->next_balance = next_balance;
        }
-       free_cpumask_var(tmpmask);
  }
  
  /*
@@@ -4313,11 -4312,6 +4312,6 @@@ static void rebalance_domains(int cpu, 
        unsigned long next_balance = jiffies + 60*HZ;
        int update_next_balance = 0;
        int need_serialize;
-       cpumask_var_t tmp;
-       /* Fails alloc?  Rebalancing probably not a priority right now. */
-       if (!alloc_cpumask_var(&tmp, GFP_ATOMIC))
-               return;
  
        for_each_domain(cpu, sd) {
                if (!(sd->flags & SD_LOAD_BALANCE))
                }
  
                if (time_after_eq(jiffies, sd->last_balance + interval)) {
-                       if (load_balance(cpu, rq, sd, idle, &balance, tmp)) {
+                       if (load_balance(cpu, rq, sd, idle, &balance)) {
                                /*
                                 * We've pulled tasks over so either we're no
                                 * longer idle, or one of our SMT siblings is
@@@ -4376,8 -4370,6 +4370,6 @@@ out
         */
        if (likely(update_next_balance))
                rq->next_balance = next_balance;
-       free_cpumask_var(tmp);
  }
  
  /*
@@@ -5196,17 -5188,11 +5188,17 @@@ void __wake_up_locked(wait_queue_head_
        __wake_up_common(q, mode, 1, 0, NULL);
  }
  
 +void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
 +{
 +      __wake_up_common(q, mode, 1, 0, key);
 +}
 +
  /**
 - * __wake_up_sync - wake up threads blocked on a waitqueue.
 + * __wake_up_sync_key - wake up threads blocked on a waitqueue.
   * @q: the waitqueue
   * @mode: which threads
   * @nr_exclusive: how many wake-one or wake-many threads to wake up
 + * @key: opaque value to be passed to wakeup targets
   *
   * The sync wakeup differs that the waker knows that it will schedule
   * away soon, so while the target thread will be woken up, it will not
   *
   * On UP it can prevent extra preemption.
   */
 -void
 -__wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
 +void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode,
 +                      int nr_exclusive, void *key)
  {
        unsigned long flags;
        int sync = 1;
                sync = 0;
  
        spin_lock_irqsave(&q->lock, flags);
 -      __wake_up_common(q, mode, nr_exclusive, sync, NULL);
 +      __wake_up_common(q, mode, nr_exclusive, sync, key);
        spin_unlock_irqrestore(&q->lock, flags);
  }
 +EXPORT_SYMBOL_GPL(__wake_up_sync_key);
 +
 +/*
 + * __wake_up_sync - see __wake_up_sync_key()
 + */
 +void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr_exclusive)
 +{
 +      __wake_up_sync_key(q, mode, nr_exclusive, NULL);
 +}
  EXPORT_SYMBOL_GPL(__wake_up_sync);    /* For internal use only */
  
  /**
@@@ -7728,7 -7705,7 +7720,7 @@@ cpu_to_core_group(int cpu, const struc
  {
        int group;
  
-       cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+       cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
        group = cpumask_first(mask);
        if (sg)
                *sg = &per_cpu(sched_group_core, group).sg;
@@@ -7757,7 -7734,7 +7749,7 @@@ cpu_to_phys_group(int cpu, const struc
        cpumask_and(mask, cpu_coregroup_mask(cpu), cpu_map);
        group = cpumask_first(mask);
  #elif defined(CONFIG_SCHED_SMT)
-       cpumask_and(mask, &per_cpu(cpu_sibling_map, cpu), cpu_map);
+       cpumask_and(mask, topology_thread_cpumask(cpu), cpu_map);
        group = cpumask_first(mask);
  #else
        group = cpu;
@@@ -8100,7 -8077,7 +8092,7 @@@ static int __build_sched_domains(const 
                SD_INIT(sd, SIBLING);
                set_domain_attribute(sd, attr);
                cpumask_and(sched_domain_span(sd),
-                           &per_cpu(cpu_sibling_map, i), cpu_map);
+                           topology_thread_cpumask(i), cpu_map);
                sd->parent = p;
                p->child = sd;
                cpu_to_cpu_group(i, cpu_map, &sd->groups, tmpmask);
        /* Set up CPU (sibling) groups */
        for_each_cpu(i, cpu_map) {
                cpumask_and(this_sibling_map,
-                           &per_cpu(cpu_sibling_map, i), cpu_map);
+                           topology_thread_cpumask(i), cpu_map);
                if (i != cpumask_first(this_sibling_map))
                        continue;
  
@@@ -8787,6 -8764,9 +8779,9 @@@ void __init sched_init(void
  #ifdef CONFIG_USER_SCHED
        alloc_size *= 2;
  #endif
+ #ifdef CONFIG_CPUMASK_OFFSTACK
+       alloc_size += num_possible_cpus() * cpumask_size();
+ #endif
        /*
         * As sched_init() is called before page_alloc is setup,
         * we use alloc_bootmem().
                ptr += nr_cpu_ids * sizeof(void **);
  #endif /* CONFIG_USER_SCHED */
  #endif /* CONFIG_RT_GROUP_SCHED */
+ #ifdef CONFIG_CPUMASK_OFFSTACK
+               for_each_possible_cpu(i) {
+                       per_cpu(load_balance_tmpmask, i) = (void *)ptr;
+                       ptr += cpumask_size();
+               }
+ #endif /* CONFIG_CPUMASK_OFFSTACK */
        }
  
  #ifdef CONFIG_SMP
diff --combined mm/page_alloc.c
@@@ -331,7 -331,7 +331,7 @@@ static int destroy_compound_page(struc
        for (i = 1; i < nr_pages; i++) {
                struct page *p = page + i;
  
 -              if (unlikely(!PageTail(p) | (p->first_page != page))) {
 +              if (unlikely(!PageTail(p) || (p->first_page != page))) {
                        bad_page(page);
                        bad++;
                }
@@@ -922,10 -922,13 +922,10 @@@ static void drain_pages(unsigned int cp
        unsigned long flags;
        struct zone *zone;
  
 -      for_each_zone(zone) {
 +      for_each_populated_zone(zone) {
                struct per_cpu_pageset *pset;
                struct per_cpu_pages *pcp;
  
 -              if (!populated_zone(zone))
 -                      continue;
 -
                pset = zone_pcp(zone, cpu);
  
                pcp = &pset->pcp;
@@@ -1582,8 -1585,7 +1582,8 @@@ nofail_alloc
        reclaim_state.reclaimed_slab = 0;
        p->reclaim_state = &reclaim_state;
  
 -      did_some_progress = try_to_free_pages(zonelist, order, gfp_mask);
 +      did_some_progress = try_to_free_pages(zonelist, order,
 +                                              gfp_mask, nodemask);
  
        p->reclaim_state = NULL;
        lockdep_clear_current_reclaim_state();
@@@ -1877,7 -1879,10 +1877,7 @@@ void show_free_areas(void
        int cpu;
        struct zone *zone;
  
 -      for_each_zone(zone) {
 -              if (!populated_zone(zone))
 -                      continue;
 -
 +      for_each_populated_zone(zone) {
                show_node(zone);
                printk("%s per-cpu:\n", zone->name);
  
                global_page_state(NR_PAGETABLE),
                global_page_state(NR_BOUNCE));
  
 -      for_each_zone(zone) {
 +      for_each_populated_zone(zone) {
                int i;
  
 -              if (!populated_zone(zone))
 -                      continue;
 -
                show_node(zone);
                printk("%s"
                        " free:%lukB"
                printk("\n");
        }
  
 -      for_each_zone(zone) {
 +      for_each_populated_zone(zone) {
                unsigned long nr[MAX_ORDER], flags, order, total = 0;
  
 -              if (!populated_zone(zone))
 -                      continue;
 -
                show_node(zone);
                printk("%s: ", zone->name);
  
@@@ -2128,7 -2139,7 +2128,7 @@@ static int find_next_best_node(int node
        int n, val;
        int min_val = INT_MAX;
        int best_node = -1;
-       node_to_cpumask_ptr(tmp, 0);
+       const struct cpumask *tmp = cpumask_of_node(0);
  
        /* Use the local node if we haven't already */
        if (!node_isset(node, *used_node_mask)) {
                val += (n < node);
  
                /* Give preference to headless and unused nodes */
-               node_to_cpumask_ptr_next(tmp, n);
-               if (!cpus_empty(*tmp))
+               tmp = cpumask_of_node(n);
+               if (!cpumask_empty(tmp))
                        val += PENALTY_FOR_NODE_WITH_CPUS;
  
                /* Slight preference for less loaded node */
@@@ -2773,7 -2784,11 +2773,7 @@@ static int __cpuinit process_zones(int 
  
        node_set_state(node, N_CPU);    /* this node has a cpu */
  
 -      for_each_zone(zone) {
 -
 -              if (!populated_zone(zone))
 -                      continue;
 -
 +      for_each_populated_zone(zone) {
                zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
                                         GFP_KERNEL, node);
                if (!zone_pcp(zone, cpu))
diff --combined mm/slab.c
+++ b/mm/slab.c
@@@ -1160,7 -1160,7 +1160,7 @@@ static void __cpuinit cpuup_canceled(lo
        struct kmem_cache *cachep;
        struct kmem_list3 *l3 = NULL;
        int node = cpu_to_node(cpu);
-       node_to_cpumask_ptr(mask, node);
+       const struct cpumask *mask = cpumask_of_node(node);
  
        list_for_each_entry(cachep, &cache_chain, next) {
                struct array_cache *nc;
@@@ -3992,7 -3992,8 +3992,7 @@@ static void cache_reap(struct work_stru
        struct kmem_cache *searchp;
        struct kmem_list3 *l3;
        int node = numa_node_id();
 -      struct delayed_work *work =
 -              container_of(w, struct delayed_work, work);
 +      struct delayed_work *work = to_delayed_work(w);
  
        if (!mutex_trylock(&cache_chain_mutex))
                /* Give up. Setup the next iteration. */
diff --combined mm/vmscan.c
@@@ -60,8 -60,8 +60,8 @@@ struct scan_control 
  
        int may_writepage;
  
 -      /* Can pages be swapped as part of reclaim? */
 -      int may_swap;
 +      /* Can mapped pages be reclaimed? */
 +      int may_unmap;
  
        /* This context's SWAP_CLUSTER_MAX. If freeing memory for
         * suspend, we effectively ignore SWAP_CLUSTER_MAX.
        /* Which cgroup do we reclaim from */
        struct mem_cgroup *mem_cgroup;
  
 +      /*
 +       * Nodemask of nodes allowed by the caller. If NULL, all nodes
 +       * are scanned.
 +       */
 +      nodemask_t      *nodemask;
 +
        /* Pluggable isolate pages callback */
        unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst,
                        unsigned long *scanned, int order, int mode,
@@@ -220,9 -214,8 +220,9 @@@ unsigned long shrink_slab(unsigned lon
                do_div(delta, lru_pages + 1);
                shrinker->nr += delta;
                if (shrinker->nr < 0) {
 -                      printk(KERN_ERR "%s: nr=%ld\n",
 -                                      __func__, shrinker->nr);
 +                      printk(KERN_ERR "shrink_slab: %pF negative objects to "
 +                             "delete nr=%ld\n",
 +                             shrinker->shrink, shrinker->nr);
                        shrinker->nr = max_pass;
                }
  
@@@ -283,7 -276,7 +283,7 @@@ static inline int page_mapping_inuse(st
  
  static inline int is_page_cache_freeable(struct page *page)
  {
 -      return page_count(page) - !!PagePrivate(page) == 2;
 +      return page_count(page) - !!page_has_private(page) == 2;
  }
  
  static int may_write_to_queue(struct backing_dev_info *bdi)
@@@ -367,7 -360,7 +367,7 @@@ static pageout_t pageout(struct page *p
                 * Some data journaling orphaned pages can have
                 * page->mapping == NULL while being dirty with clean buffers.
                 */
 -              if (PagePrivate(page)) {
 +              if (page_has_private(page)) {
                        if (try_to_free_buffers(page)) {
                                ClearPageDirty(page);
                                printk("%s: orphaned page\n", __func__);
@@@ -613,7 -606,7 +613,7 @@@ static unsigned long shrink_page_list(s
                if (unlikely(!page_evictable(page, NULL)))
                        goto cull_mlocked;
  
 -              if (!sc->may_swap && page_mapped(page))
 +              if (!sc->may_unmap && page_mapped(page))
                        goto keep_locked;
  
                /* Double the slab pressure for mapped and swapcache pages */
                 * process address space (page_count == 1) it can be freed.
                 * Otherwise, leave the page on the LRU so it is swappable.
                 */
 -              if (PagePrivate(page)) {
 +              if (page_has_private(page)) {
                        if (!try_to_release_page(page, sc->gfp_mask))
                                goto activate_locked;
                        if (!mapping && page_count(page) == 1) {
@@@ -1305,11 -1298,17 +1305,11 @@@ static void shrink_active_list(unsigne
        }
        __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved);
        pgdeactivate += pgmoved;
        __count_zone_vm_events(PGREFILL, zone, pgscanned);
        __count_vm_events(PGDEACTIVATE, pgdeactivate);
        spin_unlock_irq(&zone->lru_lock);
 -      if (vm_swap_full())
 -              pagevec_swap_free(&pvec);
 -
 +      if (buffer_heads_over_limit)
 +              pagevec_strip(&pvec);
        pagevec_release(&pvec);
  }
  
@@@ -1544,8 -1543,7 +1544,8 @@@ static void shrink_zones(int priority, 
        struct zone *zone;
  
        sc->all_unreclaimable = 1;
 -      for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
 +      for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
 +                                      sc->nodemask) {
                if (!populated_zone(zone))
                        continue;
                /*
  }
  
  unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 -                                                              gfp_t gfp_mask)
 +                              gfp_t gfp_mask, nodemask_t *nodemask)
  {
        struct scan_control sc = {
                .gfp_mask = gfp_mask,
                .may_writepage = !laptop_mode,
                .swap_cluster_max = SWAP_CLUSTER_MAX,
 -              .may_swap = 1,
 +              .may_unmap = 1,
                .swappiness = vm_swappiness,
                .order = order,
                .mem_cgroup = NULL,
                .isolate_pages = isolate_pages_global,
 +              .nodemask = nodemask,
        };
  
        return do_try_to_free_pages(zonelist, &sc);
@@@ -1716,18 -1713,17 +1716,18 @@@ unsigned long try_to_free_mem_cgroup_pa
  {
        struct scan_control sc = {
                .may_writepage = !laptop_mode,
 -              .may_swap = 1,
 +              .may_unmap = 1,
                .swap_cluster_max = SWAP_CLUSTER_MAX,
                .swappiness = swappiness,
                .order = 0,
                .mem_cgroup = mem_cont,
                .isolate_pages = mem_cgroup_isolate_pages,
 +              .nodemask = NULL, /* we don't care the placement */
        };
        struct zonelist *zonelist;
  
        if (noswap)
 -              sc.may_swap = 0;
 +              sc.may_unmap = 0;
  
        sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                        (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@@ -1766,7 -1762,7 +1766,7 @@@ static unsigned long balance_pgdat(pg_d
        struct reclaim_state *reclaim_state = current->reclaim_state;
        struct scan_control sc = {
                .gfp_mask = GFP_KERNEL,
 -              .may_swap = 1,
 +              .may_unmap = 1,
                .swap_cluster_max = SWAP_CLUSTER_MAX,
                .swappiness = vm_swappiness,
                .order = order,
@@@ -1967,7 -1963,7 +1967,7 @@@ static int kswapd(void *p
        struct reclaim_state reclaim_state = {
                .reclaimed_slab = 0,
        };
-       node_to_cpumask_ptr(cpumask, pgdat->node_id);
+       const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
  
        lockdep_set_current_reclaim_state(GFP_KERNEL);
  
@@@ -2054,19 -2050,22 +2054,19 @@@ unsigned long global_lru_pages(void
  #ifdef CONFIG_PM
  /*
   * Helper function for shrink_all_memory().  Tries to reclaim 'nr_pages' pages
 - * from LRU lists system-wide, for given pass and priority, and returns the
 - * number of reclaimed pages
 + * from LRU lists system-wide, for given pass and priority.
   *
   * For pass > 3 we also try to shrink the LRU lists that contain a few pages
   */
 -static unsigned long shrink_all_zones(unsigned long nr_pages, int prio,
 +static void shrink_all_zones(unsigned long nr_pages, int prio,
                                      int pass, struct scan_control *sc)
  {
        struct zone *zone;
 -      unsigned long ret = 0;
 +      unsigned long nr_reclaimed = 0;
  
 -      for_each_zone(zone) {
 +      for_each_populated_zone(zone) {
                enum lru_list l;
  
 -              if (!populated_zone(zone))
 -                      continue;
                if (zone_is_all_unreclaimable(zone) && prio != DEF_PRIORITY)
                        continue;
  
  
                                zone->lru[l].nr_scan = 0;
                                nr_to_scan = min(nr_pages, lru_pages);
 -                              ret += shrink_list(l, nr_to_scan, zone,
 +                              nr_reclaimed += shrink_list(l, nr_to_scan, zone,
                                                                sc, prio);
 -                              if (ret >= nr_pages)
 -                                      return ret;
 +                              if (nr_reclaimed >= nr_pages) {
 +                                      sc->nr_reclaimed = nr_reclaimed;
 +                                      return;
 +                              }
                        }
                }
        }
 -      return ret;
 +      sc->nr_reclaimed = nr_reclaimed;
  }
  
  /*
  unsigned long shrink_all_memory(unsigned long nr_pages)
  {
        unsigned long lru_pages, nr_slab;
 -      unsigned long ret = 0;
        int pass;
        struct reclaim_state reclaim_state;
        struct scan_control sc = {
                .gfp_mask = GFP_KERNEL,
 -              .may_swap = 0,
 -              .swap_cluster_max = nr_pages,
 +              .may_unmap = 0,
                .may_writepage = 1,
                .isolate_pages = isolate_pages_global,
        };
                if (!reclaim_state.reclaimed_slab)
                        break;
  
 -              ret += reclaim_state.reclaimed_slab;
 -              if (ret >= nr_pages)
 +              sc.nr_reclaimed += reclaim_state.reclaimed_slab;
 +              if (sc.nr_reclaimed >= nr_pages)
                        goto out;
  
                nr_slab -= reclaim_state.reclaimed_slab;
  
                /* Force reclaiming mapped pages in the passes #3 and #4 */
                if (pass > 2)
 -                      sc.may_swap = 1;
 +                      sc.may_unmap = 1;
  
                for (prio = DEF_PRIORITY; prio >= 0; prio--) {
 -                      unsigned long nr_to_scan = nr_pages - ret;
 +                      unsigned long nr_to_scan = nr_pages - sc.nr_reclaimed;
  
                        sc.nr_scanned = 0;
 -                      ret += shrink_all_zones(nr_to_scan, prio, pass, &sc);
 -                      if (ret >= nr_pages)
 +                      sc.swap_cluster_max = nr_to_scan;
 +                      shrink_all_zones(nr_to_scan, prio, pass, &sc);
 +                      if (sc.nr_reclaimed >= nr_pages)
                                goto out;
  
                        reclaim_state.reclaimed_slab = 0;
                        shrink_slab(sc.nr_scanned, sc.gfp_mask,
                                        global_lru_pages());
 -                      ret += reclaim_state.reclaimed_slab;
 -                      if (ret >= nr_pages)
 +                      sc.nr_reclaimed += reclaim_state.reclaimed_slab;
 +                      if (sc.nr_reclaimed >= nr_pages)
                                goto out;
  
                        if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
        }
  
        /*
 -       * If ret = 0, we could not shrink LRUs, but there may be something
 -       * in slab caches
 +       * If sc.nr_reclaimed = 0, we could not shrink LRUs, but there may be
 +       * something in slab caches
         */
 -      if (!ret) {
 +      if (!sc.nr_reclaimed) {
                do {
                        reclaim_state.reclaimed_slab = 0;
                        shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages());
 -                      ret += reclaim_state.reclaimed_slab;
 -              } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0);
 +                      sc.nr_reclaimed += reclaim_state.reclaimed_slab;
 +              } while (sc.nr_reclaimed < nr_pages &&
 +                              reclaim_state.reclaimed_slab > 0);
        }
  
 +
  out:
        current->reclaim_state = NULL;
  
 -      return ret;
 +      return sc.nr_reclaimed;
  }
  #endif
  
@@@ -2204,7 -2200,9 +2204,9 @@@ static int __devinit cpu_callback(struc
        if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) {
                for_each_node_state(nid, N_HIGH_MEMORY) {
                        pg_data_t *pgdat = NODE_DATA(nid);
-                       node_to_cpumask_ptr(mask, pgdat->node_id);
+                       const struct cpumask *mask;
+                       mask = cpumask_of_node(pgdat->node_id);
  
                        if (cpumask_any_and(cpu_online_mask, mask) < nr_cpu_ids)
                                /* One of our CPUs online: restore mask */
@@@ -2294,12 -2292,11 +2296,12 @@@ static int __zone_reclaim(struct zone *
        int priority;
        struct scan_control sc = {
                .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE),
 -              .may_swap = !!(zone_reclaim_mode & RECLAIM_SWAP),
 +              .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
                .swap_cluster_max = max_t(unsigned long, nr_pages,
                                        SWAP_CLUSTER_MAX),
                .gfp_mask = gfp_mask,
                .swappiness = vm_swappiness,
 +              .order = order,
                .isolate_pages = isolate_pages_global,
        };
        unsigned long slab_reclaimable;
diff --combined net/sunrpc/svc.c
@@@ -317,8 -317,7 +317,7 @@@ svc_pool_map_set_cpumask(struct task_st
        }
        case SVC_POOL_PERNODE:
        {
-               node_to_cpumask_ptr(nodecpumask, node);
-               set_cpus_allowed_ptr(task, nodecpumask);
+               set_cpus_allowed_ptr(task, cpumask_of_node(node));
                break;
        }
        }
@@@ -359,7 -358,7 +358,7 @@@ svc_pool_for_cpu(struct svc_serv *serv
   */
  static struct svc_serv *
  __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 -         sa_family_t family, void (*shutdown)(struct svc_serv *serv))
 +           void (*shutdown)(struct svc_serv *serv))
  {
        struct svc_serv *serv;
        unsigned int vers;
  
        if (!(serv = kzalloc(sizeof(*serv), GFP_KERNEL)))
                return NULL;
 -      serv->sv_family    = family;
        serv->sv_name      = prog->pg_name;
        serv->sv_program   = prog;
        serv->sv_nrthreads = 1;
  
  struct svc_serv *
  svc_create(struct svc_program *prog, unsigned int bufsize,
 -              sa_family_t family, void (*shutdown)(struct svc_serv *serv))
 +         void (*shutdown)(struct svc_serv *serv))
  {
 -      return __svc_create(prog, bufsize, /*npools*/1, family, shutdown);
 +      return __svc_create(prog, bufsize, /*npools*/1, shutdown);
  }
  EXPORT_SYMBOL_GPL(svc_create);
  
  struct svc_serv *
  svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
 -                sa_family_t family, void (*shutdown)(struct svc_serv *serv),
 +                void (*shutdown)(struct svc_serv *serv),
                  svc_thread_fn func, struct module *mod)
  {
        struct svc_serv *serv;
        unsigned int npools = svc_pool_map_get();
  
 -      serv = __svc_create(prog, bufsize, npools, family, shutdown);
 +      serv = __svc_create(prog, bufsize, npools, shutdown);
  
        if (serv != NULL) {
                serv->sv_function = func;
@@@ -718,6 -718,8 +717,6 @@@ svc_exit_thread(struct svc_rqst *rqstp
  }
  EXPORT_SYMBOL_GPL(svc_exit_thread);
  
 -#ifdef CONFIG_SUNRPC_REGISTER_V4
 -
  /*
   * Register an "inet" protocol family netid with the local
   * rpcbind daemon via an rpcbind v4 SET request.
@@@ -732,13 -734,12 +731,13 @@@ static int __svc_rpcb_register4(const u
                                const unsigned short protocol,
                                const unsigned short port)
  {
 -      struct sockaddr_in sin = {
 +      const struct sockaddr_in sin = {
                .sin_family             = AF_INET,
                .sin_addr.s_addr        = htonl(INADDR_ANY),
                .sin_port               = htons(port),
        };
 -      char *netid;
 +      const char *netid;
 +      int error;
  
        switch (protocol) {
        case IPPROTO_UDP:
                netid = RPCBIND_NETID_TCP;
                break;
        default:
 -              return -EPROTONOSUPPORT;
 +              return -ENOPROTOOPT;
        }
  
 -      return rpcb_v4_register(program, version,
 -                              (struct sockaddr *)&sin, netid);
 +      error = rpcb_v4_register(program, version,
 +                                      (const struct sockaddr *)&sin, netid);
 +
 +      /*
 +       * User space didn't support rpcbind v4, so retry this
 +       * registration request with the legacy rpcbind v2 protocol.
 +       */
 +      if (error == -EPROTONOSUPPORT)
 +              error = rpcb_register(program, version, protocol, port);
 +
 +      return error;
  }
  
 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
  /*
   * Register an "inet6" protocol family netid with the local
   * rpcbind daemon via an rpcbind v4 SET request.
@@@ -779,13 -770,12 +778,13 @@@ static int __svc_rpcb_register6(const u
                                const unsigned short protocol,
                                const unsigned short port)
  {
 -      struct sockaddr_in6 sin6 = {
 +      const struct sockaddr_in6 sin6 = {
                .sin6_family            = AF_INET6,
                .sin6_addr              = IN6ADDR_ANY_INIT,
                .sin6_port              = htons(port),
        };
 -      char *netid;
 +      const char *netid;
 +      int error;
  
        switch (protocol) {
        case IPPROTO_UDP:
                netid = RPCBIND_NETID_TCP6;
                break;
        default:
 -              return -EPROTONOSUPPORT;
 +              return -ENOPROTOOPT;
        }
  
 -      return rpcb_v4_register(program, version,
 -                              (struct sockaddr *)&sin6, netid);
 +      error = rpcb_v4_register(program, version,
 +                                      (const struct sockaddr *)&sin6, netid);
 +
 +      /*
 +       * User space didn't support rpcbind version 4, so we won't
 +       * use a PF_INET6 listener.
 +       */
 +      if (error == -EPROTONOSUPPORT)
 +              error = -EAFNOSUPPORT;
 +
 +      return error;
  }
 +#endif        /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
  
  /*
   * Register a kernel RPC service via rpcbind version 4.
   * Returns zero on success; a negative errno value is returned
   * if any error occurs.
   */
 -static int __svc_register(const u32 program, const u32 version,
 -                        const sa_family_t family,
 +static int __svc_register(const char *progname,
 +                        const u32 program, const u32 version,
 +                        const int family,
                          const unsigned short protocol,
                          const unsigned short port)
  {
 -      int error;
 +      int error = -EAFNOSUPPORT;
  
        switch (family) {
 -      case AF_INET:
 -              return __svc_rpcb_register4(program, version,
 +      case PF_INET:
 +              error = __svc_rpcb_register4(program, version,
                                                protocol, port);
 -      case AF_INET6:
 +              break;
 +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 +      case PF_INET6:
                error = __svc_rpcb_register6(program, version,
                                                protocol, port);
 -              if (error < 0)
 -                      return error;
 -
 -              /*
 -               * Work around bug in some versions of Linux rpcbind
 -               * which don't allow registration of both inet and
 -               * inet6 netids.
 -               *
 -               * Error return ignored for now.
 -               */
 -              __svc_rpcb_register4(program, version,
 -                                              protocol, port);
 -              return 0;
 +#endif        /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
        }
  
 -      return -EAFNOSUPPORT;
 -}
 -
 -#else /* CONFIG_SUNRPC_REGISTER_V4 */
 -
 -/*
 - * Register a kernel RPC service via rpcbind version 2.
 - *
 - * Returns zero on success; a negative errno value is returned
 - * if any error occurs.
 - */
 -static int __svc_register(const u32 program, const u32 version,
 -                        sa_family_t family,
 -                        const unsigned short protocol,
 -                        const unsigned short port)
 -{
 -      if (family != AF_INET)
 -              return -EAFNOSUPPORT;
 -
 -      return rpcb_register(program, version, protocol, port);
 +      if (error < 0)
 +              printk(KERN_WARNING "svc: failed to register %sv%u RPC "
 +                      "service (errno %d).\n", progname, version, -error);
 +      return error;
  }
  
  /**
   * svc_register - register an RPC service with the local portmapper
   * @serv: svc_serv struct for the service to register
 + * @family: protocol family of service's listener socket
   * @proto: transport protocol number to advertise
   * @port: port to advertise
   *
 - * Service is registered for any address in serv's address family
 + * Service is registered for any address in the passed-in protocol family
   */
 -int svc_register(const struct svc_serv *serv, const unsigned short proto,
 -               const unsigned short port)
 +int svc_register(const struct svc_serv *serv, const int family,
 +               const unsigned short proto, const unsigned short port)
  {
        struct svc_program      *progp;
        unsigned int            i;
                                        i,
                                        proto == IPPROTO_UDP?  "udp" : "tcp",
                                        port,
 -                                      serv->sv_family,
 +                                      family,
                                        progp->pg_vers[i]->vs_hidden?
                                                " (but not telling portmap)" : "");
  
                        if (progp->pg_vers[i]->vs_hidden)
                                continue;
  
 -                      error = __svc_register(progp->pg_prog, i,
 -                                              serv->sv_family, proto, port);
 +                      error = __svc_register(progp->pg_name, progp->pg_prog,
 +                                              i, family, proto, port);
                        if (error < 0)
                                break;
                }
        return error;
  }
  
 -#ifdef CONFIG_SUNRPC_REGISTER_V4
 -
 +/*
 + * If user space is running rpcbind, it should take the v4 UNSET
 + * and clear everything for this [program, version].  If user space
 + * is running portmap, it will reject the v4 UNSET, but won't have
 + * any "inet6" entries anyway.  So a PMAP_UNSET should be sufficient
 + * in this case to clear all existing entries for [program, version].
 + */
  static void __svc_unregister(const u32 program, const u32 version,
                             const char *progname)
  {
 -      struct sockaddr_in6 sin6 = {
 -              .sin6_family            = AF_INET6,
 -              .sin6_addr              = IN6ADDR_ANY_INIT,
 -              .sin6_port              = 0,
 -      };
        int error;
  
 -      error = rpcb_v4_register(program, version,
 -                              (struct sockaddr *)&sin6, "");
 -      dprintk("svc: %s(%sv%u), error %d\n",
 -                      __func__, progname, version, error);
 -}
 -
 -#else /* CONFIG_SUNRPC_REGISTER_V4 */
 +      error = rpcb_v4_register(program, version, NULL, "");
  
 -static void __svc_unregister(const u32 program, const u32 version,
 -                           const char *progname)
 -{
 -      int error;
 +      /*
 +       * User space didn't support rpcbind v4, so retry this
 +       * request with the legacy rpcbind v2 protocol.
 +       */
 +      if (error == -EPROTONOSUPPORT)
 +              error = rpcb_register(program, version, 0, 0);
  
 -      error = rpcb_register(program, version, 0, 0);
        dprintk("svc: %s(%sv%u), error %d\n",
                        __func__, progname, version, error);
  }
  
 -#endif        /* CONFIG_SUNRPC_REGISTER_V4 */
 -
  /*
   * All netids, bind addresses and ports registered for [program, version]
   * are removed from the local rpcbind database (if the service is not