ext4: Once a day, printk file system error information to dmesg
Theodore Ts'o [Tue, 27 Jul 2010 15:56:04 +0000 (11:56 -0400)]
This allows us to grab any file system error messages by scraping
/var/log/messages.  This will make it easy for us to do error analysis
across the very large number of machines as we deploy ext4 across the
fleet.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>

fs/ext4/ext4.h
fs/ext4/super.c

index 6b96125..5d3d768 100644 (file)
@@ -1166,6 +1166,9 @@ struct ext4_sb_info {
 
        /* workqueue for dio unwritten */
        struct workqueue_struct *dio_unwritten_wq;
+
+       /* timer for periodic error stats printing */
+       struct timer_list s_err_report;
 };
 
 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
index a94d3f5..ed00c14 100644 (file)
@@ -325,6 +325,12 @@ static void __save_error_info(struct super_block *sb, const char *func,
                es->s_first_error_ino = es->s_last_error_ino;
                es->s_first_error_block = es->s_last_error_block;
        }
+       /*
+        * Start the daily error reporting function if it hasn't been
+        * started already
+        */
+       if (!es->s_error_count)
+               mod_timer(&EXT4_SB(sb)->s_err_report, jiffies + 24*60*60*HZ);
        es->s_error_count = cpu_to_le32(le32_to_cpu(es->s_error_count) + 1);
 }
 
@@ -2480,6 +2486,53 @@ static int ext4_feature_set_ok(struct super_block *sb, int readonly)
        return 1;
 }
 
+/*
+ * This function is called once a day if we have errors logged
+ * on the file system
+ */
+static void print_daily_error_info(unsigned long arg)
+{
+       struct super_block *sb = (struct super_block *) arg;
+       struct ext4_sb_info *sbi;
+       struct ext4_super_block *es;
+
+       sbi = EXT4_SB(sb);
+       es = sbi->s_es;
+
+       if (es->s_error_count)
+               ext4_msg(sb, KERN_NOTICE, "error count: %u",
+                        le32_to_cpu(es->s_error_count));
+       if (es->s_first_error_time) {
+               printk(KERN_NOTICE "EXT4-fs (%s): initial error at %u: %.*s:%d",
+                      sb->s_id, le32_to_cpu(es->s_first_error_time),
+                      (int) sizeof(es->s_first_error_func),
+                      es->s_first_error_func,
+                      le32_to_cpu(es->s_first_error_line));
+               if (es->s_first_error_ino)
+                       printk(": inode %u",
+                              le32_to_cpu(es->s_first_error_ino));
+               if (es->s_first_error_block)
+                       printk(": block %llu", (unsigned long long)
+                              le64_to_cpu(es->s_first_error_block));
+               printk("\n");
+       }
+       if (es->s_last_error_time) {
+               printk(KERN_NOTICE "EXT4-fs (%s): last error at %u: %.*s:%d",
+                      sb->s_id, le32_to_cpu(es->s_last_error_time),
+                      (int) sizeof(es->s_last_error_func),
+                      es->s_last_error_func,
+                      le32_to_cpu(es->s_last_error_line));
+               if (es->s_last_error_ino)
+                       printk(": inode %u",
+                              le32_to_cpu(es->s_last_error_ino));
+               if (es->s_last_error_block)
+                       printk(": block %llu", (unsigned long long)
+                              le64_to_cpu(es->s_last_error_block));
+               printk("\n");
+       }
+       mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
+}
+
 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
                                __releases(kernel_lock)
                                __acquires(kernel_lock)
@@ -3083,6 +3136,12 @@ no_journal:
        ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. "
                "Opts: %s", descr, orig_data);
 
+       init_timer(&sbi->s_err_report);
+       sbi->s_err_report.function = print_daily_error_info;
+       sbi->s_err_report.data = (unsigned long) sb;
+       if (es->s_error_count)
+               mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
+
        lock_kernel();
        kfree(orig_data);
        return 0;