inotify: use GFP_NOFS under potential memory pressure
Eric Paris [Wed, 15 Jul 2009 19:49:52 +0000 (15:49 -0400)]
inotify can have a watchs removed under filesystem reclaim.

=================================
[ INFO: inconsistent lock state ]
2.6.31-rc2 #16
---------------------------------
inconsistent {IN-RECLAIM_FS-W} -> {RECLAIM_FS-ON-W} usage.
khubd/217 [HC0[0]:SC0[0]:HE1:SE1] takes:
 (iprune_mutex){+.+.?.}, at: [<c10ba899>] invalidate_inodes+0x20/0xe3
{IN-RECLAIM_FS-W} state was registered at:
  [<c10536ab>] __lock_acquire+0x2c9/0xac4
  [<c1053f45>] lock_acquire+0x9f/0xc2
  [<c1308872>] __mutex_lock_common+0x2d/0x323
  [<c1308c00>] mutex_lock_nested+0x2e/0x36
  [<c10ba6ff>] shrink_icache_memory+0x38/0x1b2
  [<c108bfb6>] shrink_slab+0xe2/0x13c
  [<c108c3e1>] kswapd+0x3d1/0x55d
  [<c10449b5>] kthread+0x66/0x6b
  [<c1003fdf>] kernel_thread_helper+0x7/0x10
  [<ffffffff>] 0xffffffff

Two things are needed to fix this.  First we need a method to tell
fsnotify_create_event() to use GFP_NOFS and second we need to stop using
one global IN_IGNORED event and allocate them one at a time.  This solves
current issues with multiple IN_IGNORED on a queue having tail drop
problems and simplifies the allocations since we don't have to worry about
two tasks opperating on the IGNORED event concurrently.

Signed-off-by: Eric Paris <eparis@redhat.com>

fs/notify/fsnotify.c
fs/notify/inotify/inotify_user.c
fs/notify/notification.c
include/linux/fsnotify_backend.h

index ec2f7bd..037e878 100644 (file)
@@ -159,7 +159,9 @@ void fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is, const
                        if (!group->ops->should_send_event(group, to_tell, mask))
                                continue;
                        if (!event) {
-                               event = fsnotify_create_event(to_tell, mask, data, data_is, file_name, cookie);
+                               event = fsnotify_create_event(to_tell, mask, data,
+                                                             data_is, file_name, cookie,
+                                                             GFP_KERNEL);
                                /* shit, we OOM'd and now we can't tell, maybe
                                 * someday someone else will want to do something
                                 * here */
index 726118a..f30d9bb 100644 (file)
@@ -57,7 +57,6 @@ int inotify_max_user_watches __read_mostly;
 
 static struct kmem_cache *inotify_inode_mark_cachep __read_mostly;
 struct kmem_cache *event_priv_cachep __read_mostly;
-static struct fsnotify_event *inotify_ignored_event;
 
 /*
  * When inotify registers a new group it increments this and uses that
@@ -384,12 +383,19 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
                                    struct fsnotify_group *group)
 {
        struct inotify_inode_mark_entry *ientry;
+       struct fsnotify_event *ignored_event;
        struct inotify_event_private_data *event_priv;
        struct fsnotify_event_private_data *fsn_event_priv;
 
+       ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL,
+                                             FSNOTIFY_EVENT_NONE, NULL, 0,
+                                             GFP_NOFS);
+       if (!ignored_event)
+               return;
+
        ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
 
-       event_priv = kmem_cache_alloc(event_priv_cachep, GFP_KERNEL);
+       event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS);
        if (unlikely(!event_priv))
                goto skip_send_ignore;
 
@@ -398,7 +404,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
        fsn_event_priv->group = group;
        event_priv->wd = ientry->wd;
 
-       fsnotify_add_notify_event(group, inotify_ignored_event, fsn_event_priv);
+       fsnotify_add_notify_event(group, ignored_event, fsn_event_priv);
 
        /* did the private data get added? */
        if (list_empty(&fsn_event_priv->event_list))
@@ -406,6 +412,9 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry,
 
 skip_send_ignore:
 
+       /* matches the reference taken when the event was created */
+       fsnotify_put_event(ignored_event);
+
        /* remove this entry from the idr */
        inotify_remove_from_idr(group, ientry);
 
@@ -748,9 +757,6 @@ static int __init inotify_user_setup(void)
 
        inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC);
        event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC);
-       inotify_ignored_event = fsnotify_create_event(NULL, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE, NULL, 0);
-       if (!inotify_ignored_event)
-               panic("unable to allocate the inotify ignored event\n");
 
        inotify_max_queued_events = 16384;
        inotify_max_user_instances = 128;
index 2b20fea..5213685 100644 (file)
@@ -153,7 +153,7 @@ static bool event_compare(struct fsnotify_event *old, struct fsnotify_event *new
                                return true;
                        break;
                case (FSNOTIFY_EVENT_NONE):
-                       return true;
+                       return false;
                };
        }
        return false;
@@ -345,18 +345,19 @@ static void initialize_event(struct fsnotify_event *event)
  * @name the filename, if available
  */
 struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask, void *data,
-                                            int data_type, const char *name, u32 cookie)
+                                            int data_type, const char *name, u32 cookie,
+                                            gfp_t gfp)
 {
        struct fsnotify_event *event;
 
-       event = kmem_cache_alloc(fsnotify_event_cachep, GFP_KERNEL);
+       event = kmem_cache_alloc(fsnotify_event_cachep, gfp);
        if (!event)
                return NULL;
 
        initialize_event(event);
 
        if (name) {
-               event->file_name = kstrdup(name, GFP_KERNEL);
+               event->file_name = kstrdup(name, gfp);
                if (!event->file_name) {
                        kmem_cache_free(fsnotify_event_cachep, event);
                        return NULL;
index 6c3de99..4d6f47b 100644 (file)
@@ -352,7 +352,7 @@ extern void fsnotify_unmount_inodes(struct list_head *list);
 /* put here because inotify does some weird stuff when destroying watches */
 extern struct fsnotify_event *fsnotify_create_event(struct inode *to_tell, __u32 mask,
                                                    void *data, int data_is, const char *name,
-                                                   u32 cookie);
+                                                   u32 cookie, gfp_t gfp);
 
 #else