Merge branch 'core-iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6.git] / drivers / xen / balloon.c
index 718050a..f54290b 100644 (file)
@@ -1,6 +1,4 @@
 /******************************************************************************
- * balloon.c
- *
  * Xen balloon driver - enables returning/claiming memory to/from Xen.
  *
  * Copyright (c) 2003, B Dragovic
@@ -33,7 +31,6 @@
  */
 
 #include <linux/kernel.h>
-#include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
-#include <linux/sysdev.h>
 #include <linux/gfp.h>
 
 #include <asm/page.h>
 #include <asm/pgalloc.h>
 #include <asm/pgtable.h>
-#include <asm/uaccess.h>
 #include <asm/tlb.h>
 #include <asm/e820.h>
 
 #include <xen/xen.h>
 #include <xen/interface/xen.h>
 #include <xen/interface/memory.h>
-#include <xen/xenbus.h>
+#include <xen/balloon.h>
 #include <xen/features.h>
 #include <xen/page.h>
 
-#define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10))
-
-#define BALLOON_CLASS_NAME "xen_memory"
+/*
+ * balloon_process() state:
+ *
+ * BP_DONE: done or nothing to do,
+ * BP_EAGAIN: error, go to sleep,
+ * BP_ECANCELED: error, balloon operation canceled.
+ */
 
-struct balloon_stats {
-       /* We aim for 'current allocation' == 'target allocation'. */
-       unsigned long current_pages;
-       unsigned long target_pages;
-       /*
-        * Drivers may alter the memory reservation independently, but they
-        * must inform the balloon driver so we avoid hitting the hard limit.
-        */
-       unsigned long driver_pages;
-       /* Number of pages in high- and low-memory balloons. */
-       unsigned long balloon_low;
-       unsigned long balloon_high;
+enum bp_state {
+       BP_DONE,
+       BP_EAGAIN,
+       BP_ECANCELED
 };
 
-static DEFINE_MUTEX(balloon_mutex);
-
-static struct sys_device balloon_sysdev;
 
-static int register_balloon(struct sys_device *sysdev);
+static DEFINE_MUTEX(balloon_mutex);
 
-static struct balloon_stats balloon_stats;
+struct balloon_stats balloon_stats;
+EXPORT_SYMBOL_GPL(balloon_stats);
 
 /* We increase/decrease in batches which fit in a page */
 static unsigned long frame_list[PAGE_SIZE / sizeof(unsigned long)];
@@ -104,8 +93,7 @@ static LIST_HEAD(ballooned_pages);
 
 /* Main work function, always executed in process context. */
 static void balloon_process(struct work_struct *work);
-static DECLARE_WORK(balloon_worker, balloon_process);
-static struct timer_list balloon_timer;
+static DECLARE_DELAYED_WORK(balloon_worker, balloon_process);
 
 /* When ballooning out (allocating memory to return to Xen) we don't really
    want the kernel to try too hard since that can trigger the oom killer. */
@@ -126,7 +114,6 @@ static void __balloon_append(struct page *page)
        if (PageHighMem(page)) {
                list_add_tail(&page->lru, &ballooned_pages);
                balloon_stats.balloon_high++;
-               dec_totalhigh_pages();
        } else {
                list_add(&page->lru, &ballooned_pages);
                balloon_stats.balloon_low++;
@@ -136,18 +123,23 @@ static void __balloon_append(struct page *page)
 static void balloon_append(struct page *page)
 {
        __balloon_append(page);
+       if (PageHighMem(page))
+               dec_totalhigh_pages();
        totalram_pages--;
 }
 
 /* balloon_retrieve: rescue a page from the balloon, if it is not empty. */
-static struct page *balloon_retrieve(void)
+static struct page *balloon_retrieve(bool prefer_highmem)
 {
        struct page *page;
 
        if (list_empty(&ballooned_pages))
                return NULL;
 
-       page = list_entry(ballooned_pages.next, struct page, lru);
+       if (prefer_highmem)
+               page = list_entry(ballooned_pages.prev, struct page, lru);
+       else
+               page = list_entry(ballooned_pages.next, struct page, lru);
        list_del(&page->lru);
 
        if (PageHighMem(page)) {
@@ -177,12 +169,32 @@ static struct page *balloon_next_page(struct page *page)
        return list_entry(next, struct page, lru);
 }
 
-static void balloon_alarm(unsigned long unused)
+static enum bp_state update_schedule(enum bp_state state)
 {
-       schedule_work(&balloon_worker);
+       if (state == BP_DONE) {
+               balloon_stats.schedule_delay = 1;
+               balloon_stats.retry_count = 1;
+               return BP_DONE;
+       }
+
+       ++balloon_stats.retry_count;
+
+       if (balloon_stats.max_retry_count != RETRY_UNLIMITED &&
+                       balloon_stats.retry_count > balloon_stats.max_retry_count) {
+               balloon_stats.schedule_delay = 1;
+               balloon_stats.retry_count = 1;
+               return BP_ECANCELED;
+       }
+
+       balloon_stats.schedule_delay <<= 1;
+
+       if (balloon_stats.schedule_delay > balloon_stats.max_schedule_delay)
+               balloon_stats.schedule_delay = balloon_stats.max_schedule_delay;
+
+       return BP_EAGAIN;
 }
 
-static unsigned long current_target(void)
+static long current_credit(void)
 {
        unsigned long target = balloon_stats.target_pages;
 
@@ -191,14 +203,14 @@ static unsigned long current_target(void)
                     balloon_stats.balloon_low +
                     balloon_stats.balloon_high);
 
-       return target;
+       return target - balloon_stats.current_pages;
 }
 
-static int increase_reservation(unsigned long nr_pages)
+static enum bp_state increase_reservation(unsigned long nr_pages)
 {
+       int rc;
        unsigned long  pfn, i;
        struct page   *page;
-       long           rc;
        struct xen_memory_reservation reservation = {
                .address_bits = 0,
                .extent_order = 0,
@@ -210,7 +222,10 @@ static int increase_reservation(unsigned long nr_pages)
 
        page = balloon_first_page();
        for (i = 0; i < nr_pages; i++) {
-               BUG_ON(page == NULL);
+               if (!page) {
+                       nr_pages = i;
+                       break;
+               }
                frame_list[i] = page_to_pfn(page);
                page = balloon_next_page(page);
        }
@@ -218,11 +233,11 @@ static int increase_reservation(unsigned long nr_pages)
        set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents = nr_pages;
        rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation);
-       if (rc < 0)
-               goto out;
+       if (rc <= 0)
+               return BP_EAGAIN;
 
        for (i = 0; i < rc; i++) {
-               page = balloon_retrieve();
+               page = balloon_retrieve(false);
                BUG_ON(page == NULL);
 
                pfn = page_to_pfn(page);
@@ -232,7 +247,7 @@ static int increase_reservation(unsigned long nr_pages)
                set_phys_to_machine(pfn, frame_list[i]);
 
                /* Link back into the page tables if not highmem. */
-               if (!xen_hvm_domain() && pfn < max_low_pfn) {
+               if (xen_pv_domain() && !PageHighMem(page)) {
                        int ret;
                        ret = HYPERVISOR_update_va_mapping(
                                (unsigned long)__va(pfn << PAGE_SHIFT),
@@ -249,15 +264,14 @@ static int increase_reservation(unsigned long nr_pages)
 
        balloon_stats.current_pages += rc;
 
- out:
-       return rc < 0 ? rc : rc != nr_pages;
+       return BP_DONE;
 }
 
-static int decrease_reservation(unsigned long nr_pages)
+static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
 {
+       enum bp_state state = BP_DONE;
        unsigned long  pfn, i;
        struct page   *page;
-       int            need_sleep = 0;
        int ret;
        struct xen_memory_reservation reservation = {
                .address_bits = 0,
@@ -269,9 +283,9 @@ static int decrease_reservation(unsigned long nr_pages)
                nr_pages = ARRAY_SIZE(frame_list);
 
        for (i = 0; i < nr_pages; i++) {
-               if ((page = alloc_page(GFP_BALLOON)) == NULL) {
+               if ((page = alloc_page(gfp)) == NULL) {
                        nr_pages = i;
-                       need_sleep = 1;
+                       state = BP_EAGAIN;
                        break;
                }
 
@@ -280,7 +294,7 @@ static int decrease_reservation(unsigned long nr_pages)
 
                scrub_page(page);
 
-               if (!xen_hvm_domain() && !PageHighMem(page)) {
+               if (xen_pv_domain() && !PageHighMem(page)) {
                        ret = HYPERVISOR_update_va_mapping(
                                (unsigned long)__va(pfn << PAGE_SHIFT),
                                __pte_ma(0), 0);
@@ -307,7 +321,7 @@ static int decrease_reservation(unsigned long nr_pages)
 
        balloon_stats.current_pages -= nr_pages;
 
-       return need_sleep;
+       return state;
 }
 
 /*
@@ -318,103 +332,121 @@ static int decrease_reservation(unsigned long nr_pages)
  */
 static void balloon_process(struct work_struct *work)
 {
-       int need_sleep = 0;
+       enum bp_state state = BP_DONE;
        long credit;
 
        mutex_lock(&balloon_mutex);
 
        do {
-               credit = current_target() - balloon_stats.current_pages;
+               credit = current_credit();
+
                if (credit > 0)
-                       need_sleep = (increase_reservation(credit) != 0);
+                       state = increase_reservation(credit);
+
                if (credit < 0)
-                       need_sleep = (decrease_reservation(-credit) != 0);
+                       state = decrease_reservation(-credit, GFP_BALLOON);
+
+               state = update_schedule(state);
 
 #ifndef CONFIG_PREEMPT
                if (need_resched())
                        schedule();
 #endif
-       } while ((credit != 0) && !need_sleep);
+       } while (credit && state == BP_DONE);
 
        /* Schedule more work if there is some still to be done. */
-       if (current_target() != balloon_stats.current_pages)
-               mod_timer(&balloon_timer, jiffies + HZ);
+       if (state == BP_EAGAIN)
+               schedule_delayed_work(&balloon_worker, balloon_stats.schedule_delay * HZ);
 
        mutex_unlock(&balloon_mutex);
 }
 
 /* Resets the Xen limit, sets new target, and kicks off processing. */
-static void balloon_set_new_target(unsigned long target)
+void balloon_set_new_target(unsigned long target)
 {
        /* No need for lock. Not read-modify-write updates. */
        balloon_stats.target_pages = target;
-       schedule_work(&balloon_worker);
+       schedule_delayed_work(&balloon_worker, 0);
 }
+EXPORT_SYMBOL_GPL(balloon_set_new_target);
 
-static struct xenbus_watch target_watch =
-{
-       .node = "memory/target"
-};
-
-/* React to a change in the target key */
-static void watch_target(struct xenbus_watch *watch,
-                        const char **vec, unsigned int len)
+/**
+ * alloc_xenballooned_pages - get pages that have been ballooned out
+ * @nr_pages: Number of pages to get
+ * @pages: pages returned
+ * @return 0 on success, error otherwise
+ */
+int alloc_xenballooned_pages(int nr_pages, struct page** pages)
 {
-       unsigned long long new_target;
-       int err;
-
-       err = xenbus_scanf(XBT_NIL, "memory", "target", "%llu", &new_target);
-       if (err != 1) {
-               /* This is ok (for domain0 at least) - so just return */
-               return;
+       int pgno = 0;
+       struct page* page;
+       mutex_lock(&balloon_mutex);
+       while (pgno < nr_pages) {
+               page = balloon_retrieve(true);
+               if (page) {
+                       pages[pgno++] = page;
+               } else {
+                       enum bp_state st;
+                       st = decrease_reservation(nr_pages - pgno, GFP_HIGHUSER);
+                       if (st != BP_DONE)
+                               goto out_undo;
+               }
        }
-
-       /* The given memory/target value is in KiB, so it needs converting to
-        * pages. PAGE_SHIFT converts bytes to pages, hence PAGE_SHIFT - 10.
-        */
-       balloon_set_new_target(new_target >> (PAGE_SHIFT - 10));
+       mutex_unlock(&balloon_mutex);
+       return 0;
+ out_undo:
+       while (pgno)
+               balloon_append(pages[--pgno]);
+       /* Free the memory back to the kernel soon */
+       schedule_delayed_work(&balloon_worker, 0);
+       mutex_unlock(&balloon_mutex);
+       return -ENOMEM;
 }
+EXPORT_SYMBOL(alloc_xenballooned_pages);
 
-static int balloon_init_watcher(struct notifier_block *notifier,
-                               unsigned long event,
-                               void *data)
+/**
+ * free_xenballooned_pages - return pages retrieved with get_ballooned_pages
+ * @nr_pages: Number of pages
+ * @pages: pages to return
+ */
+void free_xenballooned_pages(int nr_pages, struct page** pages)
 {
-       int err;
+       int i;
 
-       err = register_xenbus_watch(&target_watch);
-       if (err)
-               printk(KERN_ERR "Failed to set balloon watcher\n");
+       mutex_lock(&balloon_mutex);
 
-       return NOTIFY_DONE;
-}
+       for (i = 0; i < nr_pages; i++) {
+               if (pages[i])
+                       balloon_append(pages[i]);
+       }
+
+       /* The balloon may be too large now. Shrink it if needed. */
+       if (current_credit())
+               schedule_delayed_work(&balloon_worker, 0);
 
-static struct notifier_block xenstore_notifier;
+       mutex_unlock(&balloon_mutex);
+}
+EXPORT_SYMBOL(free_xenballooned_pages);
 
 static int __init balloon_init(void)
 {
-       unsigned long pfn, nr_pages, extra_pfn_end;
+       unsigned long pfn, extra_pfn_end;
        struct page *page;
 
        if (!xen_domain())
                return -ENODEV;
 
-       pr_info("xen_balloon: Initialising balloon driver.\n");
+       pr_info("xen/balloon: Initialising balloon driver.\n");
 
-       if (xen_pv_domain())
-               nr_pages = xen_start_info->nr_pages;
-       else
-               nr_pages = max_pfn;
-       balloon_stats.current_pages = min(nr_pages, max_pfn);
+       balloon_stats.current_pages = xen_pv_domain() ? min(xen_start_info->nr_pages, max_pfn) : max_pfn;
        balloon_stats.target_pages  = balloon_stats.current_pages;
        balloon_stats.balloon_low   = 0;
        balloon_stats.balloon_high  = 0;
-       balloon_stats.driver_pages  = 0UL;
-
-       init_timer(&balloon_timer);
-       balloon_timer.data = 0;
-       balloon_timer.function = balloon_alarm;
 
-       register_balloon(&balloon_sysdev);
+       balloon_stats.schedule_delay = 1;
+       balloon_stats.max_schedule_delay = 32;
+       balloon_stats.retry_count = 1;
+       balloon_stats.max_retry_count = RETRY_UNLIMITED;
 
        /*
         * Initialise the balloon with excess memory space.  We need
@@ -431,158 +463,14 @@ static int __init balloon_init(void)
             pfn < extra_pfn_end;
             pfn++) {
                page = pfn_to_page(pfn);
-               /* totalram_pages doesn't include the boot-time
+               /* totalram_pages and totalhigh_pages do not include the boot-time
                   balloon extension, so don't subtract from it. */
                __balloon_append(page);
        }
 
-       target_watch.callback = watch_target;
-       xenstore_notifier.notifier_call = balloon_init_watcher;
-
-       register_xenstore_notifier(&xenstore_notifier);
-
        return 0;
 }
 
 subsys_initcall(balloon_init);
 
-static void balloon_exit(void)
-{
-    /* XXX - release balloon here */
-    return;
-}
-
-module_exit(balloon_exit);
-
-#define BALLOON_SHOW(name, format, args...)                            \
-       static ssize_t show_##name(struct sys_device *dev,              \
-                                  struct sysdev_attribute *attr,       \
-                                  char *buf)                           \
-       {                                                               \
-               return sprintf(buf, format, ##args);                    \
-       }                                                               \
-       static SYSDEV_ATTR(name, S_IRUGO, show_##name, NULL)
-
-BALLOON_SHOW(current_kb, "%lu\n", PAGES2KB(balloon_stats.current_pages));
-BALLOON_SHOW(low_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_low));
-BALLOON_SHOW(high_kb, "%lu\n", PAGES2KB(balloon_stats.balloon_high));
-BALLOON_SHOW(driver_kb, "%lu\n", PAGES2KB(balloon_stats.driver_pages));
-
-static ssize_t show_target_kb(struct sys_device *dev, struct sysdev_attribute *attr,
-                             char *buf)
-{
-       return sprintf(buf, "%lu\n", PAGES2KB(balloon_stats.target_pages));
-}
-
-static ssize_t store_target_kb(struct sys_device *dev,
-                              struct sysdev_attribute *attr,
-                              const char *buf,
-                              size_t count)
-{
-       char *endchar;
-       unsigned long long target_bytes;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EPERM;
-
-       target_bytes = simple_strtoull(buf, &endchar, 0) * 1024;
-
-       balloon_set_new_target(target_bytes >> PAGE_SHIFT);
-
-       return count;
-}
-
-static SYSDEV_ATTR(target_kb, S_IRUGO | S_IWUSR,
-                  show_target_kb, store_target_kb);
-
-
-static ssize_t show_target(struct sys_device *dev, struct sysdev_attribute *attr,
-                             char *buf)
-{
-       return sprintf(buf, "%llu\n",
-                      (unsigned long long)balloon_stats.target_pages
-                      << PAGE_SHIFT);
-}
-
-static ssize_t store_target(struct sys_device *dev,
-                           struct sysdev_attribute *attr,
-                           const char *buf,
-                           size_t count)
-{
-       char *endchar;
-       unsigned long long target_bytes;
-
-       if (!capable(CAP_SYS_ADMIN))
-               return -EPERM;
-
-       target_bytes = memparse(buf, &endchar);
-
-       balloon_set_new_target(target_bytes >> PAGE_SHIFT);
-
-       return count;
-}
-
-static SYSDEV_ATTR(target, S_IRUGO | S_IWUSR,
-                  show_target, store_target);
-
-
-static struct sysdev_attribute *balloon_attrs[] = {
-       &attr_target_kb,
-       &attr_target,
-};
-
-static struct attribute *balloon_info_attrs[] = {
-       &attr_current_kb.attr,
-       &attr_low_kb.attr,
-       &attr_high_kb.attr,
-       &attr_driver_kb.attr,
-       NULL
-};
-
-static struct attribute_group balloon_info_group = {
-       .name = "info",
-       .attrs = balloon_info_attrs,
-};
-
-static struct sysdev_class balloon_sysdev_class = {
-       .name = BALLOON_CLASS_NAME,
-};
-
-static int register_balloon(struct sys_device *sysdev)
-{
-       int i, error;
-
-       error = sysdev_class_register(&balloon_sysdev_class);
-       if (error)
-               return error;
-
-       sysdev->id = 0;
-       sysdev->cls = &balloon_sysdev_class;
-
-       error = sysdev_register(sysdev);
-       if (error) {
-               sysdev_class_unregister(&balloon_sysdev_class);
-               return error;
-       }
-
-       for (i = 0; i < ARRAY_SIZE(balloon_attrs); i++) {
-               error = sysdev_create_file(sysdev, balloon_attrs[i]);
-               if (error)
-                       goto fail;
-       }
-
-       error = sysfs_create_group(&sysdev->kobj, &balloon_info_group);
-       if (error)
-               goto fail;
-
-       return 0;
-
- fail:
-       while (--i >= 0)
-               sysdev_remove_file(sysdev, balloon_attrs[i]);
-       sysdev_unregister(sysdev);
-       sysdev_class_unregister(&balloon_sysdev_class);
-       return error;
-}
-
 MODULE_LICENSE("GPL");