Blame - fs/ext4/fast_commit.c - linux-5.10

blob: 3ee43fd6d5aa7056e3b74ba0e9e152ad61dc0d53 [file] [log] [blame]

Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2
				3	/*
				4	* fs/ext4/fast_commit.c
				5	*
				6	* Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com>
				7	*
				8	* Ext4 fast commits routines.
				9	*/
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	10	#include "ext4.h"
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	11	#include "ext4_jbd2.h"
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	12	#include "ext4_extents.h"
				13	#include "mballoc.h"
				14
				15	/*
				16	* Ext4 Fast Commits
				17	* -----------------
				18	*
				19	* Ext4 fast commits implement fine grained journalling for Ext4.
				20	*
				21	* Fast commits are organized as a log of tag-length-value (TLV) structs. (See
				22	* struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by
				23	* TLV during the recovery phase. For the scenarios for which we currently
				24	* don't have replay code, fast commit falls back to full commits.
				25	* Fast commits record delta in one of the following three categories.
				26	*
				27	* (A) Directory entry updates:
				28	*
				29	* - EXT4_FC_TAG_UNLINK - records directory entry unlink
				30	* - EXT4_FC_TAG_LINK - records directory entry link
				31	* - EXT4_FC_TAG_CREAT - records inode and directory entry creation
				32	*
				33	* (B) File specific data range updates:
				34	*
				35	* - EXT4_FC_TAG_ADD_RANGE - records addition of new blocks to an inode
				36	* - EXT4_FC_TAG_DEL_RANGE - records deletion of blocks from an inode
				37	*
				38	* (C) Inode metadata (mtime / ctime etc):
				39	*
				40	* - EXT4_FC_TAG_INODE - record the inode that should be replayed
				41	* during recovery. Note that iblocks field is
				42	* not replayed and instead derived during
				43	* replay.
				44	* Commit Operation
				45	* ----------------
				46	* With fast commits, we maintain all the directory entry operations in the
				47	* order in which they are issued in an in-memory queue. This queue is flushed
				48	* to disk during the commit operation. We also maintain a list of inodes
				49	* that need to be committed during a fast commit in another in memory queue of
				50	* inodes. During the commit operation, we commit in the following order:
				51	*
				52	* [1] Lock inodes for any further data updates by setting COMMITTING state
				53	* [2] Submit data buffers of all the inodes
				54	* [3] Wait for [2] to complete
				55	* [4] Commit all the directory entry updates in the fast commit space
				56	* [5] Commit all the changed inode structures
				57	* [6] Write tail tag (this tag ensures the atomicity, please read the following
				58	* section for more details).
				59	* [7] Wait for [4], [5] and [6] to complete.
				60	*
				61	* All the inode updates must call ext4_fc_start_update() before starting an
				62	* update. If such an ongoing update is present, fast commit waits for it to
				63	* complete. The completion of such an update is marked by
				64	* ext4_fc_stop_update().
				65	*
				66	* Fast Commit Ineligibility
				67	* -------------------------
				68	* Not all operations are supported by fast commits today (e.g extended
				69	* attributes). Fast commit ineligiblity is marked by calling one of the
				70	* two following functions:
				71	*
				72	* - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall
				73	* back to full commit. This is useful in case of transient errors.
				74	*
				75	* - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all
				76	* the fast commits happening between ext4_fc_start_ineligible() and
				77	* ext4_fc_stop_ineligible() and one fast commit after the call to
				78	* ext4_fc_stop_ineligible() to fall back to full commits. It is important to
				79	* make one more fast commit to fall back to full commit after stop call so
				80	* that it guaranteed that the fast commit ineligible operation contained
				81	* within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is
				82	* followed by at least 1 full commit.
				83	*
				84	* Atomicity of commits
				85	* --------------------
				86	* In order to gaurantee atomicity during the commit operation, fast commit
				87	* uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
				88	* tag contains CRC of the contents and TID of the transaction after which
				89	* this fast commit should be applied. Recovery code replays fast commit
				90	* logs only if there's at least 1 valid tail present. For every fast commit
				91	* operation, there is 1 tail. This means, we may end up with multiple tails
				92	* in the fast commit space. Here's an example:
				93	*
				94	* - Create a new file A and remove existing file B
				95	* - fsync()
				96	* - Append contents to file A
				97	* - Truncate file A
				98	* - fsync()
				99	*
				100	* The fast commit space at the end of above operations would look like this:
				101	* [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL]
				102	* \|<--- Fast Commit 1 --->\|<--- Fast Commit 2 ---->\|
				103	*
				104	* Replay code should thus check for all the valid tails in the FC area.
				105	*
				106	* TODOs
				107	* -----
				108	* 1) Make fast commit atomic updates more fine grained. Today, a fast commit
				109	* eligible update must be protected within ext4_fc_start_update() and
				110	* ext4_fc_stop_update(). These routines are called at much higher
				111	* routines. This can be made more fine grained by combining with
				112	* ext4_journal_start().
				113	*
				114	* 2) Same above for ext4_fc_start_ineligible() and ext4_fc_stop_ineligible()
				115	*
				116	* 3) Handle more ineligible cases.
				117	*/
				118
				119	#include <trace/events/ext4.h>
				120	static struct kmem_cache *ext4_fc_dentry_cachep;
				121
				122	static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
				123	{
				124	BUFFER_TRACE(bh, "");
				125	if (uptodate) {
				126	ext4_debug("%s: Block %lld up-to-date",
				127	__func__, bh->b_blocknr);
				128	set_buffer_uptodate(bh);
				129	} else {
				130	ext4_debug("%s: Block %lld not up-to-date",
				131	__func__, bh->b_blocknr);
				132	clear_buffer_uptodate(bh);
				133	}
				134
				135	unlock_buffer(bh);
				136	}
				137
				138	static inline void ext4_fc_reset_inode(struct inode *inode)
				139	{
				140	struct ext4_inode_info *ei = EXT4_I(inode);
				141
				142	ei->i_fc_lblk_start = 0;
				143	ei->i_fc_lblk_len = 0;
				144	}
				145
				146	void ext4_fc_init_inode(struct inode *inode)
				147	{
				148	struct ext4_inode_info *ei = EXT4_I(inode);
				149
				150	ext4_fc_reset_inode(inode);
				151	ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
				152	INIT_LIST_HEAD(&ei->i_fc_list);
				153	init_waitqueue_head(&ei->i_fc_wait);
				154	atomic_set(&ei->i_fc_updates, 0);
				155	ei->i_fc_committed_subtid = 0;
				156	}
				157
				158	/*
				159	* Inform Ext4's fast about start of an inode update
				160	*
				161	* This function is called by the high level call VFS callbacks before
				162	* performing any inode update. This function blocks if there's an ongoing
				163	* fast commit on the inode in question.
				164	*/
				165	void ext4_fc_start_update(struct inode *inode)
				166	{
				167	struct ext4_inode_info *ei = EXT4_I(inode);
				168
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	169	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) \|\|
				170	(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	171	return;
				172
				173	restart:
				174	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				175	if (list_empty(&ei->i_fc_list))
				176	goto out;
				177
				178	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
				179	wait_queue_head_t *wq;
				180	#if (BITS_PER_LONG < 64)
				181	DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
				182	EXT4_STATE_FC_COMMITTING);
				183	wq = bit_waitqueue(&ei->i_state_flags,
				184	EXT4_STATE_FC_COMMITTING);
				185	#else
				186	DEFINE_WAIT_BIT(wait, &ei->i_flags,
				187	EXT4_STATE_FC_COMMITTING);
				188	wq = bit_waitqueue(&ei->i_flags,
				189	EXT4_STATE_FC_COMMITTING);
				190	#endif
				191	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
				192	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				193	schedule();
				194	finish_wait(wq, &wait.wq_entry);
				195	goto restart;
				196	}
				197	out:
				198	atomic_inc(&ei->i_fc_updates);
				199	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				200	}
				201
				202	/*
				203	* Stop inode update and wake up waiting fast commits if any.
				204	*/
				205	void ext4_fc_stop_update(struct inode *inode)
				206	{
				207	struct ext4_inode_info *ei = EXT4_I(inode);
				208
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	209	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) \|\|
				210	(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	211	return;
				212
				213	if (atomic_dec_and_test(&ei->i_fc_updates))
				214	wake_up_all(&ei->i_fc_wait);
				215	}
				216
				217	/*
				218	* Remove inode from fast commit list. If the inode is being committed
				219	* we wait until inode commit is done.
				220	*/
				221	void ext4_fc_del(struct inode *inode)
				222	{
				223	struct ext4_inode_info *ei = EXT4_I(inode);
				224
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	225	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) \|\|
				226	(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	227	return;
				228
				229	restart:
				230	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				231	if (list_empty(&ei->i_fc_list)) {
				232	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				233	return;
				234	}
				235
				236	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
				237	wait_queue_head_t *wq;
				238	#if (BITS_PER_LONG < 64)
				239	DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
				240	EXT4_STATE_FC_COMMITTING);
				241	wq = bit_waitqueue(&ei->i_state_flags,
				242	EXT4_STATE_FC_COMMITTING);
				243	#else
				244	DEFINE_WAIT_BIT(wait, &ei->i_flags,
				245	EXT4_STATE_FC_COMMITTING);
				246	wq = bit_waitqueue(&ei->i_flags,
				247	EXT4_STATE_FC_COMMITTING);
				248	#endif
				249	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
				250	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				251	schedule();
				252	finish_wait(wq, &wait.wq_entry);
				253	goto restart;
				254	}
				255	if (!list_empty(&ei->i_fc_list))
				256	list_del_init(&ei->i_fc_list);
				257	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				258	}
				259
				260	/*
				261	* Mark file system as fast commit ineligible. This means that next commit
				262	* operation would result in a full jbd2 commit.
				263	*/
				264	void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
				265	{
				266	struct ext4_sb_info *sbi = EXT4_SB(sb);
				267
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	268	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) \|\|
				269	(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
				270	return;
				271
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	272	sbi->s_mount_flags \|= EXT4_MF_FC_INELIGIBLE;
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	273	WARN_ON(reason >= EXT4_FC_REASON_MAX);
				274	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
				275	}
				276
				277	/*
				278	* Start a fast commit ineligible update. Any commits that happen while
				279	* such an operation is in progress fall back to full commits.
				280	*/
				281	void ext4_fc_start_ineligible(struct super_block *sb, int reason)
				282	{
				283	struct ext4_sb_info *sbi = EXT4_SB(sb);
				284
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	285	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) \|\|
				286	(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
				287	return;
				288
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	289	WARN_ON(reason >= EXT4_FC_REASON_MAX);
				290	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
				291	atomic_inc(&sbi->s_fc_ineligible_updates);
				292	}
				293
				294	/*
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	295	* Stop a fast commit ineligible update. We set EXT4_MF_FC_INELIGIBLE flag here
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	296	* to ensure that after stopping the ineligible update, at least one full
				297	* commit takes place.
				298	*/
				299	void ext4_fc_stop_ineligible(struct super_block *sb)
				300	{
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	301	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) \|\|
				302	(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
				303	return;
				304
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	305	EXT4_SB(sb)->s_mount_flags \|= EXT4_MF_FC_INELIGIBLE;
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	306	atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
				307	}
				308
				309	static inline int ext4_fc_is_ineligible(struct super_block *sb)
				310	{
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	311	return (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FC_INELIGIBLE) \|\|
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	312	atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates);
				313	}
				314
				315	/*
				316	* Generic fast commit tracking function. If this is the first time this we are
				317	* called after a full commit, we initialize fast commit fields and then call
				318	* __fc_track_fn() with update = 0. If we have already been called after a full
				319	* commit, we pass update = 1. Based on that, the track function can determine
				320	* if it needs to track a field for the first time or if it needs to just
				321	* update the previously tracked value.
				322	*
				323	* If enqueue is set, this function enqueues the inode in fast commit list.
				324	*/
				325	static int ext4_fc_track_template(
				326	struct inode inode, int (__fc_track_fn)(struct inode , void , bool),
				327	void *args, int enqueue)
				328	{
				329	tid_t running_txn_tid;
				330	bool update = false;
				331	struct ext4_inode_info *ei = EXT4_I(inode);
				332	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
				333	int ret;
				334
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	335	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) \|\|
				336	(sbi->s_mount_state & EXT4_FC_REPLAY))
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	337	return -EOPNOTSUPP;
				338
				339	if (ext4_fc_is_ineligible(inode->i_sb))
				340	return -EINVAL;
				341
				342	running_txn_tid = sbi->s_journal ?
				343	sbi->s_journal->j_commit_sequence + 1 : 0;
				344
				345	mutex_lock(&ei->i_fc_lock);
				346	if (running_txn_tid == ei->i_sync_tid) {
				347	update = true;
				348	} else {
				349	ext4_fc_reset_inode(inode);
				350	ei->i_sync_tid = running_txn_tid;
				351	}
				352	ret = __fc_track_fn(inode, args, update);
				353	mutex_unlock(&ei->i_fc_lock);
				354
				355	if (!enqueue)
				356	return ret;
				357
				358	spin_lock(&sbi->s_fc_lock);
				359	if (list_empty(&EXT4_I(inode)->i_fc_list))
				360	list_add_tail(&EXT4_I(inode)->i_fc_list,
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	361	(sbi->s_mount_flags & EXT4_MF_FC_COMMITTING) ?
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	362	&sbi->s_fc_q[FC_Q_STAGING] :
				363	&sbi->s_fc_q[FC_Q_MAIN]);
				364	spin_unlock(&sbi->s_fc_lock);
				365
				366	return ret;
				367	}
				368
				369	struct __track_dentry_update_args {
				370	struct dentry *dentry;
				371	int op;
				372	};
				373
				374	/* __track_fn for directory entry updates. Called with ei->i_fc_lock. */
				375	static int __track_dentry_update(struct inode inode, void arg, bool update)
				376	{
				377	struct ext4_fc_dentry_update *node;
				378	struct ext4_inode_info *ei = EXT4_I(inode);
				379	struct __track_dentry_update_args *dentry_update =
				380	(struct __track_dentry_update_args *)arg;
				381	struct dentry *dentry = dentry_update->dentry;
				382	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
				383
				384	mutex_unlock(&ei->i_fc_lock);
				385	node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
				386	if (!node) {
				387	ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_MEM);
				388	mutex_lock(&ei->i_fc_lock);
				389	return -ENOMEM;
				390	}
				391
				392	node->fcd_op = dentry_update->op;
				393	node->fcd_parent = dentry->d_parent->d_inode->i_ino;
				394	node->fcd_ino = inode->i_ino;
				395	if (dentry->d_name.len > DNAME_INLINE_LEN) {
				396	node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
				397	if (!node->fcd_name.name) {
				398	kmem_cache_free(ext4_fc_dentry_cachep, node);
				399	ext4_fc_mark_ineligible(inode->i_sb,
				400	EXT4_FC_REASON_MEM);
				401	mutex_lock(&ei->i_fc_lock);
				402	return -ENOMEM;
				403	}
				404	memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
				405	dentry->d_name.len);
				406	} else {
				407	memcpy(node->fcd_iname, dentry->d_name.name,
				408	dentry->d_name.len);
				409	node->fcd_name.name = node->fcd_iname;
				410	}
				411	node->fcd_name.len = dentry->d_name.len;
				412
				413	spin_lock(&sbi->s_fc_lock);
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	414	if (sbi->s_mount_flags & EXT4_MF_FC_COMMITTING)
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	415	list_add_tail(&node->fcd_list,
				416	&sbi->s_fc_dentry_q[FC_Q_STAGING]);
				417	else
				418	list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
				419	spin_unlock(&sbi->s_fc_lock);
				420	mutex_lock(&ei->i_fc_lock);
				421
				422	return 0;
				423	}
				424
				425	void ext4_fc_track_unlink(struct inode inode, struct dentry dentry)
				426	{
				427	struct __track_dentry_update_args args;
				428	int ret;
				429
				430	args.dentry = dentry;
				431	args.op = EXT4_FC_TAG_UNLINK;
				432
				433	ret = ext4_fc_track_template(inode, __track_dentry_update,
				434	(void *)&args, 0);
				435	trace_ext4_fc_track_unlink(inode, dentry, ret);
				436	}
				437
				438	void ext4_fc_track_link(struct inode inode, struct dentry dentry)
				439	{
				440	struct __track_dentry_update_args args;
				441	int ret;
				442
				443	args.dentry = dentry;
				444	args.op = EXT4_FC_TAG_LINK;
				445
				446	ret = ext4_fc_track_template(inode, __track_dentry_update,
				447	(void *)&args, 0);
				448	trace_ext4_fc_track_link(inode, dentry, ret);
				449	}
				450
				451	void ext4_fc_track_create(struct inode inode, struct dentry dentry)
				452	{
				453	struct __track_dentry_update_args args;
				454	int ret;
				455
				456	args.dentry = dentry;
				457	args.op = EXT4_FC_TAG_CREAT;
				458
				459	ret = ext4_fc_track_template(inode, __track_dentry_update,
				460	(void *)&args, 0);
				461	trace_ext4_fc_track_create(inode, dentry, ret);
				462	}
				463
				464	/* __track_fn for inode tracking */
				465	static int __track_inode(struct inode inode, void arg, bool update)
				466	{
				467	if (update)
				468	return -EEXIST;
				469
				470	EXT4_I(inode)->i_fc_lblk_len = 0;
				471
				472	return 0;
				473	}
				474
				475	void ext4_fc_track_inode(struct inode *inode)
				476	{
				477	int ret;
				478
				479	if (S_ISDIR(inode->i_mode))
				480	return;
				481
				482	ret = ext4_fc_track_template(inode, __track_inode, NULL, 1);
				483	trace_ext4_fc_track_inode(inode, ret);
				484	}
				485
				486	struct __track_range_args {
				487	ext4_lblk_t start, end;
				488	};
				489
				490	/* __track_fn for tracking data updates */
				491	static int __track_range(struct inode inode, void arg, bool update)
				492	{
				493	struct ext4_inode_info *ei = EXT4_I(inode);
				494	ext4_lblk_t oldstart;
				495	struct __track_range_args *__arg =
				496	(struct __track_range_args *)arg;
				497
				498	if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
				499	ext4_debug("Special inode %ld being modified\n", inode->i_ino);
				500	return -ECANCELED;
				501	}
				502
				503	oldstart = ei->i_fc_lblk_start;
				504
				505	if (update && ei->i_fc_lblk_len > 0) {
				506	ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start);
				507	ei->i_fc_lblk_len =
				508	max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) -
				509	ei->i_fc_lblk_start + 1;
				510	} else {
				511	ei->i_fc_lblk_start = __arg->start;
				512	ei->i_fc_lblk_len = __arg->end - __arg->start + 1;
				513	}
				514
				515	return 0;
				516	}
				517
				518	void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
				519	ext4_lblk_t end)
				520	{
				521	struct __track_range_args args;
				522	int ret;
				523
				524	if (S_ISDIR(inode->i_mode))
				525	return;
				526
				527	args.start = start;
				528	args.end = end;
				529
				530	ret = ext4_fc_track_template(inode, __track_range, &args, 1);
				531
				532	trace_ext4_fc_track_range(inode, start, end, ret);
				533	}
				534
				535	static void ext4_fc_submit_bh(struct super_block *sb)
				536	{
				537	int write_flags = REQ_SYNC;
				538	struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
				539
				540	if (test_opt(sb, BARRIER))
				541	write_flags \|= REQ_FUA \| REQ_PREFLUSH;
				542	lock_buffer(bh);
				543	clear_buffer_dirty(bh);
				544	set_buffer_uptodate(bh);
				545	bh->b_end_io = ext4_end_buffer_io_sync;
				546	submit_bh(REQ_OP_WRITE, write_flags, bh);
				547	EXT4_SB(sb)->s_fc_bh = NULL;
				548	}
				549
				550	/* Ext4 commit path routines */
				551
				552	/* memzero and update CRC */
				553	static void ext4_fc_memzero(struct super_block sb, void *dst, int len,
				554	u32 *crc)
				555	{
				556	void *ret;
				557
				558	ret = memset(dst, 0, len);
				559	if (crc)
				560	crc = ext4_chksum(EXT4_SB(sb), crc, dst, len);
				561	return ret;
				562	}
				563
				564	/*
				565	* Allocate len bytes on a fast commit buffer.
				566	*
				567	* During the commit time this function is used to manage fast commit
				568	* block space. We don't split a fast commit log onto different
				569	* blocks. So this function makes sure that if there's not enough space
				570	* on the current block, the remaining space in the current block is
				571	* marked as unused by adding EXT4_FC_TAG_PAD tag. In that case,
				572	* new block is from jbd2 and CRC is updated to reflect the padding
				573	* we added.
				574	*/
				575	static u8 ext4_fc_reserve_space(struct super_block sb, int len, u32 *crc)
				576	{
				577	struct ext4_fc_tl *tl;
				578	struct ext4_sb_info *sbi = EXT4_SB(sb);
				579	struct buffer_head *bh;
				580	int bsize = sbi->s_journal->j_blocksize;
				581	int ret, off = sbi->s_fc_bytes % bsize;
				582	int pad_len;
				583
				584	/*
				585	* After allocating len, we should have space at least for a 0 byte
				586	* padding.
				587	*/
				588	if (len + sizeof(struct ext4_fc_tl) > bsize)
				589	return NULL;
				590
				591	if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) {
				592	/*
				593	* Only allocate from current buffer if we have enough space for
				594	* this request AND we have space to add a zero byte padding.
				595	*/
				596	if (!sbi->s_fc_bh) {
				597	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
				598	if (ret)
				599	return NULL;
				600	sbi->s_fc_bh = bh;
				601	}
				602	sbi->s_fc_bytes += len;
				603	return sbi->s_fc_bh->b_data + off;
				604	}
				605	/* Need to add PAD tag */
				606	tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
				607	tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
				608	pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl);
				609	tl->fc_len = cpu_to_le16(pad_len);
				610	if (crc)
				611	crc = ext4_chksum(sbi, crc, tl, sizeof(*tl));
				612	if (pad_len > 0)
				613	ext4_fc_memzero(sb, tl + 1, pad_len, crc);
				614	ext4_fc_submit_bh(sb);
				615
				616	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
				617	if (ret)
				618	return NULL;
				619	sbi->s_fc_bh = bh;
				620	sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
				621	return sbi->s_fc_bh->b_data;
				622	}
				623
				624	/* memcpy to fc reserved space and update CRC */
				625	static void ext4_fc_memcpy(struct super_block sb, void dst, const void src,
				626	int len, u32 *crc)
				627	{
				628	if (crc)
				629	crc = ext4_chksum(EXT4_SB(sb), crc, src, len);
				630	return memcpy(dst, src, len);
				631	}
				632
				633	/*
				634	* Complete a fast commit by writing tail tag.
				635	*
				636	* Writing tail tag marks the end of a fast commit. In order to guarantee
				637	* atomicity, after writing tail tag, even if there's space remaining
				638	* in the block, next commit shouldn't use it. That's why tail tag
				639	* has the length as that of the remaining space on the block.
				640	*/
				641	static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
				642	{
				643	struct ext4_sb_info *sbi = EXT4_SB(sb);
				644	struct ext4_fc_tl tl;
				645	struct ext4_fc_tail tail;
				646	int off, bsize = sbi->s_journal->j_blocksize;
				647	u8 *dst;
				648
				649	/*
				650	* ext4_fc_reserve_space takes care of allocating an extra block if
				651	* there's no enough space on this block for accommodating this tail.
				652	*/
				653	dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc);
				654	if (!dst)
				655	return -ENOSPC;
				656
				657	off = sbi->s_fc_bytes % bsize;
				658
				659	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
				660	tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
				661	sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
				662
				663	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc);
				664	dst += sizeof(tl);
				665	tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
				666	ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
				667	dst += sizeof(tail.fc_tid);
				668	tail.fc_crc = cpu_to_le32(crc);
				669	ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
				670
				671	ext4_fc_submit_bh(sb);
				672
				673	return 0;
				674	}
				675
				676	/*
				677	* Adds tag, length, value and updates CRC. Returns true if tlv was added.
				678	* Returns false if there's not enough space.
				679	*/
				680	static bool ext4_fc_add_tlv(struct super_block sb, u16 tag, u16 len, u8 val,
				681	u32 *crc)
				682	{
				683	struct ext4_fc_tl tl;
				684	u8 *dst;
				685
				686	dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc);
				687	if (!dst)
				688	return false;
				689
				690	tl.fc_tag = cpu_to_le16(tag);
				691	tl.fc_len = cpu_to_le16(len);
				692
				693	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
				694	ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc);
				695
				696	return true;
				697	}
				698
				699	/* Same as above, but adds dentry tlv. */
				700	static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u16 tag,
				701	int parent_ino, int ino, int dlen,
				702	const unsigned char *dname,
				703	u32 *crc)
				704	{
				705	struct ext4_fc_dentry_info fcd;
				706	struct ext4_fc_tl tl;
				707	u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen,
				708	crc);
				709
				710	if (!dst)
				711	return false;
				712
				713	fcd.fc_parent_ino = cpu_to_le32(parent_ino);
				714	fcd.fc_ino = cpu_to_le32(ino);
				715	tl.fc_tag = cpu_to_le16(tag);
				716	tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
				717	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
				718	dst += sizeof(tl);
				719	ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
				720	dst += sizeof(fcd);
				721	ext4_fc_memcpy(sb, dst, dname, dlen, crc);
				722	dst += dlen;
				723
				724	return true;
				725	}
				726
				727	/*
				728	* Writes inode in the fast commit space under TLV with tag @tag.
				729	* Returns 0 on success, error on failure.
				730	*/
				731	static int ext4_fc_write_inode(struct inode inode, u32 crc)
				732	{
				733	struct ext4_inode_info *ei = EXT4_I(inode);
				734	int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
				735	int ret;
				736	struct ext4_iloc iloc;
				737	struct ext4_fc_inode fc_inode;
				738	struct ext4_fc_tl tl;
				739	u8 *dst;
				740
				741	ret = ext4_get_inode_loc(inode, &iloc);
				742	if (ret)
				743	return ret;
				744
				745	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
				746	inode_len += ei->i_extra_isize;
				747
				748	fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
				749	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
				750	tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
				751
				752	dst = ext4_fc_reserve_space(inode->i_sb,
				753	sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc);
				754	if (!dst)
				755	return -ECANCELED;
				756
				757	if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc))
				758	return -ECANCELED;
				759	dst += sizeof(tl);
				760	if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
				761	return -ECANCELED;
				762	dst += sizeof(fc_inode);
				763	if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
				764	inode_len, crc))
				765	return -ECANCELED;
				766
				767	return 0;
				768	}
				769
				770	/*
				771	* Writes updated data ranges for the inode in question. Updates CRC.
				772	* Returns 0 on success, error otherwise.
				773	*/
				774	static int ext4_fc_write_inode_data(struct inode inode, u32 crc)
				775	{
				776	ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
				777	struct ext4_inode_info *ei = EXT4_I(inode);
				778	struct ext4_map_blocks map;
				779	struct ext4_fc_add_range fc_ext;
				780	struct ext4_fc_del_range lrange;
				781	struct ext4_extent *ex;
				782	int ret;
				783
				784	mutex_lock(&ei->i_fc_lock);
				785	if (ei->i_fc_lblk_len == 0) {
				786	mutex_unlock(&ei->i_fc_lock);
				787	return 0;
				788	}
				789	old_blk_size = ei->i_fc_lblk_start;
				790	new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
				791	ei->i_fc_lblk_len = 0;
				792	mutex_unlock(&ei->i_fc_lock);
				793
				794	cur_lblk_off = old_blk_size;
				795	jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n",
				796	__func__, cur_lblk_off, new_blk_size, inode->i_ino);
				797
				798	while (cur_lblk_off <= new_blk_size) {
				799	map.m_lblk = cur_lblk_off;
				800	map.m_len = new_blk_size - cur_lblk_off + 1;
				801	ret = ext4_map_blocks(NULL, inode, &map, 0);
				802	if (ret < 0)
				803	return -ECANCELED;
				804
				805	if (map.m_len == 0) {
				806	cur_lblk_off++;
				807	continue;
				808	}
				809
				810	if (ret == 0) {
				811	lrange.fc_ino = cpu_to_le32(inode->i_ino);
				812	lrange.fc_lblk = cpu_to_le32(map.m_lblk);
				813	lrange.fc_len = cpu_to_le32(map.m_len);
				814	if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
				815	sizeof(lrange), (u8 *)&lrange, crc))
				816	return -ENOSPC;
				817	} else {
				818	fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
				819	ex = (struct ext4_extent *)&fc_ext.fc_ex;
				820	ex->ee_block = cpu_to_le32(map.m_lblk);
				821	ex->ee_len = cpu_to_le16(map.m_len);
				822	ext4_ext_store_pblock(ex, map.m_pblk);
				823	if (map.m_flags & EXT4_MAP_UNWRITTEN)
				824	ext4_ext_mark_unwritten(ex);
				825	else
				826	ext4_ext_mark_initialized(ex);
				827	if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
				828	sizeof(fc_ext), (u8 *)&fc_ext, crc))
				829	return -ENOSPC;
				830	}
				831
				832	cur_lblk_off += map.m_len;
				833	}
				834
				835	return 0;
				836	}
				837
				838
				839	/* Submit data for all the fast commit inodes */
				840	static int ext4_fc_submit_inode_data_all(journal_t *journal)
				841	{
				842	struct super_block sb = (struct super_block )(journal->j_private);
				843	struct ext4_sb_info *sbi = EXT4_SB(sb);
				844	struct ext4_inode_info *ei;
				845	struct list_head *pos;
				846	int ret = 0;
				847
				848	spin_lock(&sbi->s_fc_lock);
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	849	sbi->s_mount_flags \|= EXT4_MF_FC_COMMITTING;
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	850	list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
				851	ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
				852	ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
				853	while (atomic_read(&ei->i_fc_updates)) {
				854	DEFINE_WAIT(wait);
				855
				856	prepare_to_wait(&ei->i_fc_wait, &wait,
				857	TASK_UNINTERRUPTIBLE);
				858	if (atomic_read(&ei->i_fc_updates)) {
				859	spin_unlock(&sbi->s_fc_lock);
				860	schedule();
				861	spin_lock(&sbi->s_fc_lock);
				862	}
				863	finish_wait(&ei->i_fc_wait, &wait);
				864	}
				865	spin_unlock(&sbi->s_fc_lock);
				866	ret = jbd2_submit_inode_data(ei->jinode);
				867	if (ret)
				868	return ret;
				869	spin_lock(&sbi->s_fc_lock);
				870	}
				871	spin_unlock(&sbi->s_fc_lock);
				872
				873	return ret;
				874	}
				875
				876	/* Wait for completion of data for all the fast commit inodes */
				877	static int ext4_fc_wait_inode_data_all(journal_t *journal)
				878	{
				879	struct super_block sb = (struct super_block )(journal->j_private);
				880	struct ext4_sb_info *sbi = EXT4_SB(sb);
				881	struct ext4_inode_info pos, n;
				882	int ret = 0;
				883
				884	spin_lock(&sbi->s_fc_lock);
				885	list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
				886	if (!ext4_test_inode_state(&pos->vfs_inode,
				887	EXT4_STATE_FC_COMMITTING))
				888	continue;
				889	spin_unlock(&sbi->s_fc_lock);
				890
				891	ret = jbd2_wait_inode_data(journal, pos->jinode);
				892	if (ret)
				893	return ret;
				894	spin_lock(&sbi->s_fc_lock);
				895	}
				896	spin_unlock(&sbi->s_fc_lock);
				897
				898	return 0;
				899	}
				900
				901	/* Commit all the directory entry updates */
				902	static int ext4_fc_commit_dentry_updates(journal_t journal, u32 crc)
				903	{
				904	struct super_block sb = (struct super_block )(journal->j_private);
				905	struct ext4_sb_info *sbi = EXT4_SB(sb);
				906	struct ext4_fc_dentry_update *fc_dentry;
				907	struct inode *inode;
				908	struct list_head pos, n, fcd_pos, fcd_n;
				909	struct ext4_inode_info *ei;
				910	int ret;
				911
				912	if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
				913	return 0;
				914	list_for_each_safe(fcd_pos, fcd_n, &sbi->s_fc_dentry_q[FC_Q_MAIN]) {
				915	fc_dentry = list_entry(fcd_pos, struct ext4_fc_dentry_update,
				916	fcd_list);
				917	if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
				918	spin_unlock(&sbi->s_fc_lock);
				919	if (!ext4_fc_add_dentry_tlv(
				920	sb, fc_dentry->fcd_op,
				921	fc_dentry->fcd_parent, fc_dentry->fcd_ino,
				922	fc_dentry->fcd_name.len,
				923	fc_dentry->fcd_name.name, crc)) {
				924	ret = -ENOSPC;
				925	goto lock_and_exit;
				926	}
				927	spin_lock(&sbi->s_fc_lock);
				928	continue;
				929	}
				930
				931	inode = NULL;
				932	list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
				933	ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
				934	if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) {
				935	inode = &ei->vfs_inode;
				936	break;
				937	}
				938	}
				939	/*
				940	* If we don't find inode in our list, then it was deleted,
				941	* in which case, we don't need to record it's create tag.
				942	*/
				943	if (!inode)
				944	continue;
				945	spin_unlock(&sbi->s_fc_lock);
				946
				947	/*
				948	* We first write the inode and then the create dirent. This
				949	* allows the recovery code to create an unnamed inode first
				950	* and then link it to a directory entry. This allows us
				951	* to use namei.c routines almost as is and simplifies
				952	* the recovery code.
				953	*/
				954	ret = ext4_fc_write_inode(inode, crc);
				955	if (ret)
				956	goto lock_and_exit;
				957
				958	ret = ext4_fc_write_inode_data(inode, crc);
				959	if (ret)
				960	goto lock_and_exit;
				961
				962	if (!ext4_fc_add_dentry_tlv(
				963	sb, fc_dentry->fcd_op,
				964	fc_dentry->fcd_parent, fc_dentry->fcd_ino,
				965	fc_dentry->fcd_name.len,
				966	fc_dentry->fcd_name.name, crc)) {
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	967	ret = -ENOSPC;
				968	goto lock_and_exit;
				969	}
				970
				971	spin_lock(&sbi->s_fc_lock);
				972	}
				973	return 0;
				974	lock_and_exit:
				975	spin_lock(&sbi->s_fc_lock);
				976	return ret;
				977	}
				978
				979	static int ext4_fc_perform_commit(journal_t *journal)
				980	{
				981	struct super_block sb = (struct super_block )(journal->j_private);
				982	struct ext4_sb_info *sbi = EXT4_SB(sb);
				983	struct ext4_inode_info *iter;
				984	struct ext4_fc_head head;
				985	struct list_head *pos;
				986	struct inode *inode;
				987	struct blk_plug plug;
				988	int ret = 0;
				989	u32 crc = 0;
				990
				991	ret = ext4_fc_submit_inode_data_all(journal);
				992	if (ret)
				993	return ret;
				994
				995	ret = ext4_fc_wait_inode_data_all(journal);
				996	if (ret)
				997	return ret;
				998
				999	blk_start_plug(&plug);
				1000	if (sbi->s_fc_bytes == 0) {
				1001	/*
				1002	* Add a head tag only if this is the first fast commit
				1003	* in this TID.
				1004	*/
				1005	head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES);
				1006	head.fc_tid = cpu_to_le32(
				1007	sbi->s_journal->j_running_transaction->t_tid);
				1008	if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
				1009	(u8 *)&head, &crc))
				1010	goto out;
				1011	}
				1012
				1013	spin_lock(&sbi->s_fc_lock);
				1014	ret = ext4_fc_commit_dentry_updates(journal, &crc);
				1015	if (ret) {
				1016	spin_unlock(&sbi->s_fc_lock);
				1017	goto out;
				1018	}
				1019
				1020	list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
				1021	iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
				1022	inode = &iter->vfs_inode;
				1023	if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
				1024	continue;
				1025
				1026	spin_unlock(&sbi->s_fc_lock);
				1027	ret = ext4_fc_write_inode_data(inode, &crc);
				1028	if (ret)
				1029	goto out;
				1030	ret = ext4_fc_write_inode(inode, &crc);
				1031	if (ret)
				1032	goto out;
				1033	spin_lock(&sbi->s_fc_lock);
				1034	EXT4_I(inode)->i_fc_committed_subtid =
				1035	atomic_read(&sbi->s_fc_subtid);
				1036	}
				1037	spin_unlock(&sbi->s_fc_lock);
				1038
				1039	ret = ext4_fc_write_tail(sb, crc);
				1040
				1041	out:
				1042	blk_finish_plug(&plug);
				1043	return ret;
				1044	}
				1045
				1046	/*
				1047	* The main commit entry point. Performs a fast commit for transaction
				1048	* commit_tid if needed. If it's not possible to perform a fast commit
				1049	* due to various reasons, we fall back to full commit. Returns 0
				1050	* on success, error otherwise.
				1051	*/
				1052	int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
				1053	{
				1054	struct super_block sb = (struct super_block )(journal->j_private);
				1055	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1056	int nblks = 0, ret, bsize = journal->j_blocksize;
				1057	int subtid = atomic_read(&sbi->s_fc_subtid);
				1058	int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0;
				1059	ktime_t start_time, commit_time;
				1060
				1061	trace_ext4_fc_commit_start(sb);
				1062
				1063	start_time = ktime_get();
				1064
				1065	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) \|\|
				1066	(ext4_fc_is_ineligible(sb))) {
				1067	reason = EXT4_FC_REASON_INELIGIBLE;
				1068	goto out;
				1069	}
				1070
				1071	restart_fc:
				1072	ret = jbd2_fc_begin_commit(journal, commit_tid);
				1073	if (ret == -EALREADY) {
				1074	/* There was an ongoing commit, check if we need to restart */
				1075	if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
				1076	commit_tid > journal->j_commit_sequence)
				1077	goto restart_fc;
				1078	reason = EXT4_FC_REASON_ALREADY_COMMITTED;
				1079	goto out;
				1080	} else if (ret) {
				1081	sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
				1082	reason = EXT4_FC_REASON_FC_START_FAILED;
				1083	goto out;
				1084	}
				1085
				1086	fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
				1087	ret = ext4_fc_perform_commit(journal);
				1088	if (ret < 0) {
				1089	sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
				1090	reason = EXT4_FC_REASON_FC_FAILED;
				1091	goto out;
				1092	}
				1093	nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
				1094	ret = jbd2_fc_wait_bufs(journal, nblks);
				1095	if (ret < 0) {
				1096	sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
				1097	reason = EXT4_FC_REASON_FC_FAILED;
				1098	goto out;
				1099	}
				1100	atomic_inc(&sbi->s_fc_subtid);
				1101	jbd2_fc_end_commit(journal);
				1102	out:
				1103	/* Has any ineligible update happened since we started? */
				1104	if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) {
				1105	sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
				1106	reason = EXT4_FC_REASON_INELIGIBLE;
				1107	}
				1108
				1109	spin_lock(&sbi->s_fc_lock);
				1110	if (reason != EXT4_FC_REASON_OK &&
				1111	reason != EXT4_FC_REASON_ALREADY_COMMITTED) {
				1112	sbi->s_fc_stats.fc_ineligible_commits++;
				1113	} else {
				1114	sbi->s_fc_stats.fc_num_commits++;
				1115	sbi->s_fc_stats.fc_numblks += nblks;
				1116	}
				1117	spin_unlock(&sbi->s_fc_lock);
				1118	nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0;
				1119	trace_ext4_fc_commit_stop(sb, nblks, reason);
				1120	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
				1121	/*
				1122	* weight the commit time higher than the average time so we don't
				1123	* react too strongly to vast changes in the commit time
				1124	*/
				1125	if (likely(sbi->s_fc_avg_commit_time))
				1126	sbi->s_fc_avg_commit_time = (commit_time +
				1127	sbi->s_fc_avg_commit_time * 3) / 4;
				1128	else
				1129	sbi->s_fc_avg_commit_time = commit_time;
				1130	jbd_debug(1,
				1131	"Fast commit ended with blks = %d, reason = %d, subtid - %d",
				1132	nblks, reason, subtid);
				1133	if (reason == EXT4_FC_REASON_FC_FAILED)
				1134	return jbd2_fc_end_commit_fallback(journal, commit_tid);
				1135	if (reason == EXT4_FC_REASON_FC_START_FAILED \|\|
				1136	reason == EXT4_FC_REASON_INELIGIBLE)
				1137	return jbd2_complete_transaction(journal, commit_tid);
				1138	return 0;
				1139	}
				1140
Harshad Shirwadkar	ff780b9	2020-10-15 13:37:56 -0700	[diff] [blame]	1141	/*
				1142	* Fast commit cleanup routine. This is called after every fast commit and
				1143	* full commit. full is true if we are called after a full commit.
				1144	*/
				1145	static void ext4_fc_cleanup(journal_t *journal, int full)
				1146	{
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	1147	struct super_block *sb = journal->j_private;
				1148	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1149	struct ext4_inode_info *iter;
				1150	struct ext4_fc_dentry_update *fc_dentry;
				1151	struct list_head pos, n;
				1152
				1153	if (full && sbi->s_fc_bh)
				1154	sbi->s_fc_bh = NULL;
				1155
				1156	jbd2_fc_release_bufs(journal);
				1157
				1158	spin_lock(&sbi->s_fc_lock);
				1159	list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
				1160	iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
				1161	list_del_init(&iter->i_fc_list);
				1162	ext4_clear_inode_state(&iter->vfs_inode,
				1163	EXT4_STATE_FC_COMMITTING);
				1164	ext4_fc_reset_inode(&iter->vfs_inode);
				1165	/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
				1166	smp_mb();
				1167	#if (BITS_PER_LONG < 64)
				1168	wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
				1169	#else
				1170	wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
				1171	#endif
				1172	}
				1173
				1174	while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
				1175	fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
				1176	struct ext4_fc_dentry_update,
				1177	fcd_list);
				1178	list_del_init(&fc_dentry->fcd_list);
				1179	spin_unlock(&sbi->s_fc_lock);
				1180
				1181	if (fc_dentry->fcd_name.name &&
				1182	fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
				1183	kfree(fc_dentry->fcd_name.name);
				1184	kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
				1185	spin_lock(&sbi->s_fc_lock);
				1186	}
				1187
				1188	list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
				1189	&sbi->s_fc_dentry_q[FC_Q_MAIN]);
				1190	list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
				1191	&sbi->s_fc_q[FC_Q_STAGING]);
				1192
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	1193	sbi->s_mount_flags &= ~EXT4_MF_FC_COMMITTING;
				1194	sbi->s_mount_flags &= ~EXT4_MF_FC_INELIGIBLE;
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	1195
				1196	if (full)
				1197	sbi->s_fc_bytes = 0;
				1198	spin_unlock(&sbi->s_fc_lock);
				1199	trace_ext4_fc_stats(sb);
Harshad Shirwadkar	ff780b9	2020-10-15 13:37:56 -0700	[diff] [blame]	1200	}
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	1201
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	1202	/* Ext4 Replay Path Routines */
				1203
				1204	/* Get length of a particular tlv */
				1205	static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl)
				1206	{
				1207	return le16_to_cpu(tl->fc_len);
				1208	}
				1209
				1210	/* Get a pointer to "value" of a tlv */
				1211	static inline u8 ext4_fc_tag_val(struct ext4_fc_tl tl)
				1212	{
				1213	return (u8 )tl + sizeof(tl);
				1214	}
				1215
				1216	/* Helper struct for dentry replay routines */
				1217	struct dentry_info_args {
				1218	int parent_ino, dname_len, ino, inode_len;
				1219	char *dname;
				1220	};
				1221
				1222	static inline void tl_to_darg(struct dentry_info_args *darg,
				1223	struct ext4_fc_tl *tl)
				1224	{
				1225	struct ext4_fc_dentry_info *fcd;
				1226
				1227	fcd = (struct ext4_fc_dentry_info *)ext4_fc_tag_val(tl);
				1228
				1229	darg->parent_ino = le32_to_cpu(fcd->fc_parent_ino);
				1230	darg->ino = le32_to_cpu(fcd->fc_ino);
				1231	darg->dname = fcd->fc_dname;
				1232	darg->dname_len = ext4_fc_tag_len(tl) -
				1233	sizeof(struct ext4_fc_dentry_info);
				1234	}
				1235
				1236	/* Unlink replay function */
				1237	static int ext4_fc_replay_unlink(struct super_block sb, struct ext4_fc_tl tl)
				1238	{
				1239	struct inode inode, old_parent;
				1240	struct qstr entry;
				1241	struct dentry_info_args darg;
				1242	int ret = 0;
				1243
				1244	tl_to_darg(&darg, tl);
				1245
				1246	trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino,
				1247	darg.parent_ino, darg.dname_len);
				1248
				1249	entry.name = darg.dname;
				1250	entry.len = darg.dname_len;
				1251	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
				1252
				1253	if (IS_ERR_OR_NULL(inode)) {
				1254	jbd_debug(1, "Inode %d not found", darg.ino);
				1255	return 0;
				1256	}
				1257
				1258	old_parent = ext4_iget(sb, darg.parent_ino,
				1259	EXT4_IGET_NORMAL);
				1260	if (IS_ERR_OR_NULL(old_parent)) {
				1261	jbd_debug(1, "Dir with inode %d not found", darg.parent_ino);
				1262	iput(inode);
				1263	return 0;
				1264	}
				1265
				1266	ret = __ext4_unlink(old_parent, &entry, inode);
				1267	/* -ENOENT ok coz it might not exist anymore. */
				1268	if (ret == -ENOENT)
				1269	ret = 0;
				1270	iput(old_parent);
				1271	iput(inode);
				1272	return ret;
				1273	}
				1274
				1275	static int ext4_fc_replay_link_internal(struct super_block *sb,
				1276	struct dentry_info_args *darg,
				1277	struct inode *inode)
				1278	{
				1279	struct inode *dir = NULL;
				1280	struct dentry dentry_dir = NULL, dentry_inode = NULL;
				1281	struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
				1282	int ret = 0;
				1283
				1284	dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
				1285	if (IS_ERR(dir)) {
				1286	jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino);
				1287	dir = NULL;
				1288	goto out;
				1289	}
				1290
				1291	dentry_dir = d_obtain_alias(dir);
				1292	if (IS_ERR(dentry_dir)) {
				1293	jbd_debug(1, "Failed to obtain dentry");
				1294	dentry_dir = NULL;
				1295	goto out;
				1296	}
				1297
				1298	dentry_inode = d_alloc(dentry_dir, &qstr_dname);
				1299	if (!dentry_inode) {
				1300	jbd_debug(1, "Inode dentry not created.");
				1301	ret = -ENOMEM;
				1302	goto out;
				1303	}
				1304
				1305	ret = __ext4_link(dir, inode, dentry_inode);
				1306	/*
				1307	* It's possible that link already existed since data blocks
				1308	* for the dir in question got persisted before we crashed OR
				1309	* we replayed this tag and crashed before the entire replay
				1310	* could complete.
				1311	*/
				1312	if (ret && ret != -EEXIST) {
				1313	jbd_debug(1, "Failed to link\n");
				1314	goto out;
				1315	}
				1316
				1317	ret = 0;
				1318	out:
				1319	if (dentry_dir) {
				1320	d_drop(dentry_dir);
				1321	dput(dentry_dir);
				1322	} else if (dir) {
				1323	iput(dir);
				1324	}
				1325	if (dentry_inode) {
				1326	d_drop(dentry_inode);
				1327	dput(dentry_inode);
				1328	}
				1329
				1330	return ret;
				1331	}
				1332
				1333	/* Link replay function */
				1334	static int ext4_fc_replay_link(struct super_block sb, struct ext4_fc_tl tl)
				1335	{
				1336	struct inode *inode;
				1337	struct dentry_info_args darg;
				1338	int ret = 0;
				1339
				1340	tl_to_darg(&darg, tl);
				1341	trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino,
				1342	darg.parent_ino, darg.dname_len);
				1343
				1344	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
				1345	if (IS_ERR_OR_NULL(inode)) {
				1346	jbd_debug(1, "Inode not found.");
				1347	return 0;
				1348	}
				1349
				1350	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
				1351	iput(inode);
				1352	return ret;
				1353	}
				1354
				1355	/*
				1356	* Record all the modified inodes during replay. We use this later to setup
				1357	* block bitmaps correctly.
				1358	*/
				1359	static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
				1360	{
				1361	struct ext4_fc_replay_state *state;
				1362	int i;
				1363
				1364	state = &EXT4_SB(sb)->s_fc_replay_state;
				1365	for (i = 0; i < state->fc_modified_inodes_used; i++)
				1366	if (state->fc_modified_inodes[i] == ino)
				1367	return 0;
				1368	if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
				1369	state->fc_modified_inodes_size +=
				1370	EXT4_FC_REPLAY_REALLOC_INCREMENT;
				1371	state->fc_modified_inodes = krealloc(
				1372	state->fc_modified_inodes, sizeof(int) *
				1373	state->fc_modified_inodes_size,
				1374	GFP_KERNEL);
				1375	if (!state->fc_modified_inodes)
				1376	return -ENOMEM;
				1377	}
				1378	state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
				1379	return 0;
				1380	}
				1381
				1382	/*
				1383	* Inode replay function
				1384	*/
				1385	static int ext4_fc_replay_inode(struct super_block sb, struct ext4_fc_tl tl)
				1386	{
				1387	struct ext4_fc_inode *fc_inode;
				1388	struct ext4_inode *raw_inode;
				1389	struct ext4_inode *raw_fc_inode;
				1390	struct inode *inode = NULL;
				1391	struct ext4_iloc iloc;
				1392	int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag);
				1393	struct ext4_extent_header *eh;
				1394
				1395	fc_inode = (struct ext4_fc_inode *)ext4_fc_tag_val(tl);
				1396
				1397	ino = le32_to_cpu(fc_inode->fc_ino);
				1398	trace_ext4_fc_replay(sb, tag, ino, 0, 0);
				1399
				1400	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
				1401	if (!IS_ERR_OR_NULL(inode)) {
				1402	ext4_ext_clear_bb(inode);
				1403	iput(inode);
				1404	}
				1405
				1406	ext4_fc_record_modified_inode(sb, ino);
				1407
				1408	raw_fc_inode = (struct ext4_inode *)fc_inode->fc_raw_inode;
				1409	ret = ext4_get_fc_inode_loc(sb, ino, &iloc);
				1410	if (ret)
				1411	goto out;
				1412
				1413	inode_len = ext4_fc_tag_len(tl) - sizeof(struct ext4_fc_inode);
				1414	raw_inode = ext4_raw_inode(&iloc);
				1415
				1416	memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
				1417	memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation,
				1418	inode_len - offsetof(struct ext4_inode, i_generation));
				1419	if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
				1420	eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
				1421	if (eh->eh_magic != EXT4_EXT_MAGIC) {
				1422	memset(eh, 0, sizeof(*eh));
				1423	eh->eh_magic = EXT4_EXT_MAGIC;
				1424	eh->eh_max = cpu_to_le16(
				1425	(sizeof(raw_inode->i_block) -
				1426	sizeof(struct ext4_extent_header))
				1427	/ sizeof(struct ext4_extent));
				1428	}
				1429	} else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) {
				1430	memcpy(raw_inode->i_block, raw_fc_inode->i_block,
				1431	sizeof(raw_inode->i_block));
				1432	}
				1433
				1434	/* Immediately update the inode on disk. */
				1435	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
				1436	if (ret)
				1437	goto out;
				1438	ret = sync_dirty_buffer(iloc.bh);
				1439	if (ret)
				1440	goto out;
				1441	ret = ext4_mark_inode_used(sb, ino);
				1442	if (ret)
				1443	goto out;
				1444
				1445	/* Given that we just wrote the inode on disk, this SHOULD succeed. */
				1446	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
				1447	if (IS_ERR_OR_NULL(inode)) {
				1448	jbd_debug(1, "Inode not found.");
				1449	return -EFSCORRUPTED;
				1450	}
				1451
				1452	/*
				1453	* Our allocator could have made different decisions than before
				1454	* crashing. This should be fixed but until then, we calculate
				1455	* the number of blocks the inode.
				1456	*/
				1457	ext4_ext_replay_set_iblocks(inode);
				1458
				1459	inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
				1460	ext4_reset_inode_seed(inode);
				1461
				1462	ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
				1463	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
				1464	sync_dirty_buffer(iloc.bh);
				1465	brelse(iloc.bh);
				1466	out:
				1467	iput(inode);
				1468	if (!ret)
				1469	blkdev_issue_flush(sb->s_bdev, GFP_KERNEL);
				1470
				1471	return 0;
				1472	}
				1473
				1474	/*
				1475	* Dentry create replay function.
				1476	*
				1477	* EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the
				1478	* inode for which we are trying to create a dentry here, should already have
				1479	* been replayed before we start here.
				1480	*/
				1481	static int ext4_fc_replay_create(struct super_block sb, struct ext4_fc_tl tl)
				1482	{
				1483	int ret = 0;
				1484	struct inode *inode = NULL;
				1485	struct inode *dir = NULL;
				1486	struct dentry_info_args darg;
				1487
				1488	tl_to_darg(&darg, tl);
				1489
				1490	trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino,
				1491	darg.parent_ino, darg.dname_len);
				1492
				1493	/* This takes care of update group descriptor and other metadata */
				1494	ret = ext4_mark_inode_used(sb, darg.ino);
				1495	if (ret)
				1496	goto out;
				1497
				1498	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
				1499	if (IS_ERR_OR_NULL(inode)) {
				1500	jbd_debug(1, "inode %d not found.", darg.ino);
				1501	inode = NULL;
				1502	ret = -EINVAL;
				1503	goto out;
				1504	}
				1505
				1506	if (S_ISDIR(inode->i_mode)) {
				1507	/*
				1508	* If we are creating a directory, we need to make sure that the
				1509	* dot and dot dot dirents are setup properly.
				1510	*/
				1511	dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
				1512	if (IS_ERR_OR_NULL(dir)) {
				1513	jbd_debug(1, "Dir %d not found.", darg.ino);
				1514	goto out;
				1515	}
				1516	ret = ext4_init_new_dir(NULL, dir, inode);
				1517	iput(dir);
				1518	if (ret) {
				1519	ret = 0;
				1520	goto out;
				1521	}
				1522	}
				1523	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
				1524	if (ret)
				1525	goto out;
				1526	set_nlink(inode, 1);
				1527	ext4_mark_inode_dirty(NULL, inode);
				1528	out:
				1529	if (inode)
				1530	iput(inode);
				1531	return ret;
				1532	}
				1533
				1534	/*
				1535	* Record physical disk regions which are in use as per fast commit area. Our
				1536	* simple replay phase allocator excludes these regions from allocation.
				1537	*/
				1538	static int ext4_fc_record_regions(struct super_block *sb, int ino,
				1539	ext4_lblk_t lblk, ext4_fsblk_t pblk, int len)
				1540	{
				1541	struct ext4_fc_replay_state *state;
				1542	struct ext4_fc_alloc_region *region;
				1543
				1544	state = &EXT4_SB(sb)->s_fc_replay_state;
				1545	if (state->fc_regions_used == state->fc_regions_size) {
				1546	state->fc_regions_size +=
				1547	EXT4_FC_REPLAY_REALLOC_INCREMENT;
				1548	state->fc_regions = krealloc(
				1549	state->fc_regions,
				1550	state->fc_regions_size *
				1551	sizeof(struct ext4_fc_alloc_region),
				1552	GFP_KERNEL);
				1553	if (!state->fc_regions)
				1554	return -ENOMEM;
				1555	}
				1556	region = &state->fc_regions[state->fc_regions_used++];
				1557	region->ino = ino;
				1558	region->lblk = lblk;
				1559	region->pblk = pblk;
				1560	region->len = len;
				1561
				1562	return 0;
				1563	}
				1564
				1565	/* Replay add range tag */
				1566	static int ext4_fc_replay_add_range(struct super_block *sb,
				1567	struct ext4_fc_tl *tl)
				1568	{
				1569	struct ext4_fc_add_range *fc_add_ex;
				1570	struct ext4_extent newex, *ex;
				1571	struct inode *inode;
				1572	ext4_lblk_t start, cur;
				1573	int remaining, len;
				1574	ext4_fsblk_t start_pblk;
				1575	struct ext4_map_blocks map;
				1576	struct ext4_ext_path *path = NULL;
				1577	int ret;
				1578
				1579	fc_add_ex = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
				1580	ex = (struct ext4_extent *)&fc_add_ex->fc_ex;
				1581
				1582	trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE,
				1583	le32_to_cpu(fc_add_ex->fc_ino), le32_to_cpu(ex->ee_block),
				1584	ext4_ext_get_actual_len(ex));
				1585
				1586	inode = ext4_iget(sb, le32_to_cpu(fc_add_ex->fc_ino),
				1587	EXT4_IGET_NORMAL);
				1588	if (IS_ERR_OR_NULL(inode)) {
				1589	jbd_debug(1, "Inode not found.");
				1590	return 0;
				1591	}
				1592
				1593	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
				1594
				1595	start = le32_to_cpu(ex->ee_block);
				1596	start_pblk = ext4_ext_pblock(ex);
				1597	len = ext4_ext_get_actual_len(ex);
				1598
				1599	cur = start;
				1600	remaining = len;
				1601	jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
				1602	start, start_pblk, len, ext4_ext_is_unwritten(ex),
				1603	inode->i_ino);
				1604
				1605	while (remaining > 0) {
				1606	map.m_lblk = cur;
				1607	map.m_len = remaining;
				1608	map.m_pblk = 0;
				1609	ret = ext4_map_blocks(NULL, inode, &map, 0);
				1610
				1611	if (ret < 0) {
				1612	iput(inode);
				1613	return 0;
				1614	}
				1615
				1616	if (ret == 0) {
				1617	/* Range is not mapped */
				1618	path = ext4_find_extent(inode, cur, NULL, 0);
				1619	if (!path)
				1620	continue;
				1621	memset(&newex, 0, sizeof(newex));
				1622	newex.ee_block = cpu_to_le32(cur);
				1623	ext4_ext_store_pblock(
				1624	&newex, start_pblk + cur - start);
				1625	newex.ee_len = cpu_to_le16(map.m_len);
				1626	if (ext4_ext_is_unwritten(ex))
				1627	ext4_ext_mark_unwritten(&newex);
				1628	down_write(&EXT4_I(inode)->i_data_sem);
				1629	ret = ext4_ext_insert_extent(
				1630	NULL, inode, &path, &newex, 0);
				1631	up_write((&EXT4_I(inode)->i_data_sem));
				1632	ext4_ext_drop_refs(path);
				1633	kfree(path);
				1634	if (ret) {
				1635	iput(inode);
				1636	return 0;
				1637	}
				1638	goto next;
				1639	}
				1640
				1641	if (start_pblk + cur - start != map.m_pblk) {
				1642	/*
				1643	* Logical to physical mapping changed. This can happen
				1644	* if this range was removed and then reallocated to
				1645	* map to new physical blocks during a fast commit.
				1646	*/
				1647	ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
				1648	ext4_ext_is_unwritten(ex),
				1649	start_pblk + cur - start);
				1650	if (ret) {
				1651	iput(inode);
				1652	return 0;
				1653	}
				1654	/*
				1655	* Mark the old blocks as free since they aren't used
				1656	* anymore. We maintain an array of all the modified
				1657	* inodes. In case these blocks are still used at either
				1658	* a different logical range in the same inode or in
				1659	* some different inode, we will mark them as allocated
				1660	* at the end of the FC replay using our array of
				1661	* modified inodes.
				1662	*/
				1663	ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
				1664	goto next;
				1665	}
				1666
				1667	/* Range is mapped and needs a state change */
				1668	jbd_debug(1, "Converting from %d to %d %lld",
				1669	map.m_flags & EXT4_MAP_UNWRITTEN,
				1670	ext4_ext_is_unwritten(ex), map.m_pblk);
				1671	ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
				1672	ext4_ext_is_unwritten(ex), map.m_pblk);
				1673	if (ret) {
				1674	iput(inode);
				1675	return 0;
				1676	}
				1677	/*
				1678	* We may have split the extent tree while toggling the state.
				1679	* Try to shrink the extent tree now.
				1680	*/
				1681	ext4_ext_replay_shrink_inode(inode, start + len);
				1682	next:
				1683	cur += map.m_len;
				1684	remaining -= map.m_len;
				1685	}
				1686	ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
				1687	sb->s_blocksize_bits);
				1688	iput(inode);
				1689	return 0;
				1690	}
				1691
				1692	/* Replay DEL_RANGE tag */
				1693	static int
				1694	ext4_fc_replay_del_range(struct super_block sb, struct ext4_fc_tl tl)
				1695	{
				1696	struct inode *inode;
				1697	struct ext4_fc_del_range *lrange;
				1698	struct ext4_map_blocks map;
				1699	ext4_lblk_t cur, remaining;
				1700	int ret;
				1701
				1702	lrange = (struct ext4_fc_del_range *)ext4_fc_tag_val(tl);
				1703	cur = le32_to_cpu(lrange->fc_lblk);
				1704	remaining = le32_to_cpu(lrange->fc_len);
				1705
				1706	trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE,
				1707	le32_to_cpu(lrange->fc_ino), cur, remaining);
				1708
				1709	inode = ext4_iget(sb, le32_to_cpu(lrange->fc_ino), EXT4_IGET_NORMAL);
				1710	if (IS_ERR_OR_NULL(inode)) {
				1711	jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange->fc_ino));
				1712	return 0;
				1713	}
				1714
				1715	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
				1716
				1717	jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
				1718	inode->i_ino, le32_to_cpu(lrange->fc_lblk),
				1719	le32_to_cpu(lrange->fc_len));
				1720	while (remaining > 0) {
				1721	map.m_lblk = cur;
				1722	map.m_len = remaining;
				1723
				1724	ret = ext4_map_blocks(NULL, inode, &map, 0);
				1725	if (ret < 0) {
				1726	iput(inode);
				1727	return 0;
				1728	}
				1729	if (ret > 0) {
				1730	remaining -= ret;
				1731	cur += ret;
				1732	ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
				1733	} else {
				1734	remaining -= map.m_len;
				1735	cur += map.m_len;
				1736	}
				1737	}
				1738
				1739	ret = ext4_punch_hole(inode,
				1740	le32_to_cpu(lrange->fc_lblk) << sb->s_blocksize_bits,
				1741	le32_to_cpu(lrange->fc_len) << sb->s_blocksize_bits);
				1742	if (ret)
				1743	jbd_debug(1, "ext4_punch_hole returned %d", ret);
				1744	ext4_ext_replay_shrink_inode(inode,
				1745	i_size_read(inode) >> sb->s_blocksize_bits);
				1746	ext4_mark_inode_dirty(NULL, inode);
				1747	iput(inode);
				1748
				1749	return 0;
				1750	}
				1751
				1752	static inline const char *tag2str(u16 tag)
				1753	{
				1754	switch (tag) {
				1755	case EXT4_FC_TAG_LINK:
				1756	return "TAG_ADD_ENTRY";
				1757	case EXT4_FC_TAG_UNLINK:
				1758	return "TAG_DEL_ENTRY";
				1759	case EXT4_FC_TAG_ADD_RANGE:
				1760	return "TAG_ADD_RANGE";
				1761	case EXT4_FC_TAG_CREAT:
				1762	return "TAG_CREAT_DENTRY";
				1763	case EXT4_FC_TAG_DEL_RANGE:
				1764	return "TAG_DEL_RANGE";
				1765	case EXT4_FC_TAG_INODE:
				1766	return "TAG_INODE";
				1767	case EXT4_FC_TAG_PAD:
				1768	return "TAG_PAD";
				1769	case EXT4_FC_TAG_TAIL:
				1770	return "TAG_TAIL";
				1771	case EXT4_FC_TAG_HEAD:
				1772	return "TAG_HEAD";
				1773	default:
				1774	return "TAG_ERROR";
				1775	}
				1776	}
				1777
				1778	static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
				1779	{
				1780	struct ext4_fc_replay_state *state;
				1781	struct inode *inode;
				1782	struct ext4_ext_path *path = NULL;
				1783	struct ext4_map_blocks map;
				1784	int i, ret, j;
				1785	ext4_lblk_t cur, end;
				1786
				1787	state = &EXT4_SB(sb)->s_fc_replay_state;
				1788	for (i = 0; i < state->fc_modified_inodes_used; i++) {
				1789	inode = ext4_iget(sb, state->fc_modified_inodes[i],
				1790	EXT4_IGET_NORMAL);
				1791	if (IS_ERR_OR_NULL(inode)) {
				1792	jbd_debug(1, "Inode %d not found.",
				1793	state->fc_modified_inodes[i]);
				1794	continue;
				1795	}
				1796	cur = 0;
				1797	end = EXT_MAX_BLOCKS;
				1798	while (cur < end) {
				1799	map.m_lblk = cur;
				1800	map.m_len = end - cur;
				1801
				1802	ret = ext4_map_blocks(NULL, inode, &map, 0);
				1803	if (ret < 0)
				1804	break;
				1805
				1806	if (ret > 0) {
				1807	path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
				1808	if (!IS_ERR_OR_NULL(path)) {
				1809	for (j = 0; j < path->p_depth; j++)
				1810	ext4_mb_mark_bb(inode->i_sb,
				1811	path[j].p_block, 1, 1);
				1812	ext4_ext_drop_refs(path);
				1813	kfree(path);
				1814	}
				1815	cur += ret;
				1816	ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
				1817	map.m_len, 1);
				1818	} else {
				1819	cur = cur + (map.m_len ? map.m_len : 1);
				1820	}
				1821	}
				1822	iput(inode);
				1823	}
				1824	}
				1825
				1826	/*
				1827	* Check if block is in excluded regions for block allocation. The simple
				1828	* allocator that runs during replay phase is calls this function to see
				1829	* if it is okay to use a block.
				1830	*/
				1831	bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
				1832	{
				1833	int i;
				1834	struct ext4_fc_replay_state *state;
				1835
				1836	state = &EXT4_SB(sb)->s_fc_replay_state;
				1837	for (i = 0; i < state->fc_regions_valid; i++) {
				1838	if (state->fc_regions[i].ino == 0 \|\|
				1839	state->fc_regions[i].len == 0)
				1840	continue;
				1841	if (blk >= state->fc_regions[i].pblk &&
				1842	blk < state->fc_regions[i].pblk + state->fc_regions[i].len)
				1843	return true;
				1844	}
				1845	return false;
				1846	}
				1847
				1848	/* Cleanup function called after replay */
				1849	void ext4_fc_replay_cleanup(struct super_block *sb)
				1850	{
				1851	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1852
				1853	sbi->s_mount_state &= ~EXT4_FC_REPLAY;
				1854	kfree(sbi->s_fc_replay_state.fc_regions);
				1855	kfree(sbi->s_fc_replay_state.fc_modified_inodes);
				1856	}
				1857
				1858	/*
				1859	* Recovery Scan phase handler
				1860	*
				1861	* This function is called during the scan phase and is responsible
				1862	* for doing following things:
				1863	* - Make sure the fast commit area has valid tags for replay
				1864	* - Count number of tags that need to be replayed by the replay handler
				1865	* - Verify CRC
				1866	* - Create a list of excluded blocks for allocation during replay phase
				1867	*
				1868	* This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is
				1869	* incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP
				1870	* to indicate that scan has finished and JBD2 can now start replay phase.
				1871	* It returns a negative error to indicate that there was an error. At the end
				1872	* of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set
				1873	* to indicate the number of tags that need to replayed during the replay phase.
				1874	*/
				1875	static int ext4_fc_replay_scan(journal_t *journal,
				1876	struct buffer_head *bh, int off,
				1877	tid_t expected_tid)
				1878	{
				1879	struct super_block *sb = journal->j_private;
				1880	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1881	struct ext4_fc_replay_state *state;
				1882	int ret = JBD2_FC_REPLAY_CONTINUE;
				1883	struct ext4_fc_add_range *ext;
				1884	struct ext4_fc_tl *tl;
				1885	struct ext4_fc_tail *tail;
				1886	__u8 start, end;
				1887	struct ext4_fc_head *head;
				1888	struct ext4_extent *ex;
				1889
				1890	state = &sbi->s_fc_replay_state;
				1891
				1892	start = (u8 *)bh->b_data;
				1893	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
				1894
				1895	if (state->fc_replay_expected_off == 0) {
				1896	state->fc_cur_tag = 0;
				1897	state->fc_replay_num_tags = 0;
				1898	state->fc_crc = 0;
				1899	state->fc_regions = NULL;
				1900	state->fc_regions_valid = state->fc_regions_used =
				1901	state->fc_regions_size = 0;
				1902	/* Check if we can stop early */
				1903	if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
				1904	!= EXT4_FC_TAG_HEAD)
				1905	return 0;
				1906	}
				1907
				1908	if (off != state->fc_replay_expected_off) {
				1909	ret = -EFSCORRUPTED;
				1910	goto out_err;
				1911	}
				1912
				1913	state->fc_replay_expected_off++;
				1914	fc_for_each_tl(start, end, tl) {
				1915	jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
				1916	tag2str(le16_to_cpu(tl->fc_tag)), bh->b_blocknr);
				1917	switch (le16_to_cpu(tl->fc_tag)) {
				1918	case EXT4_FC_TAG_ADD_RANGE:
				1919	ext = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
				1920	ex = (struct ext4_extent *)&ext->fc_ex;
				1921	ret = ext4_fc_record_regions(sb,
				1922	le32_to_cpu(ext->fc_ino),
				1923	le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
				1924	ext4_ext_get_actual_len(ex));
				1925	if (ret < 0)
				1926	break;
				1927	ret = JBD2_FC_REPLAY_CONTINUE;
				1928	fallthrough;
				1929	case EXT4_FC_TAG_DEL_RANGE:
				1930	case EXT4_FC_TAG_LINK:
				1931	case EXT4_FC_TAG_UNLINK:
				1932	case EXT4_FC_TAG_CREAT:
				1933	case EXT4_FC_TAG_INODE:
				1934	case EXT4_FC_TAG_PAD:
				1935	state->fc_cur_tag++;
				1936	state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
				1937	sizeof(*tl) + ext4_fc_tag_len(tl));
				1938	break;
				1939	case EXT4_FC_TAG_TAIL:
				1940	state->fc_cur_tag++;
				1941	tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
				1942	state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
				1943	sizeof(*tl) +
				1944	offsetof(struct ext4_fc_tail,
				1945	fc_crc));
				1946	if (le32_to_cpu(tail->fc_tid) == expected_tid &&
				1947	le32_to_cpu(tail->fc_crc) == state->fc_crc) {
				1948	state->fc_replay_num_tags = state->fc_cur_tag;
				1949	state->fc_regions_valid =
				1950	state->fc_regions_used;
				1951	} else {
				1952	ret = state->fc_replay_num_tags ?
				1953	JBD2_FC_REPLAY_STOP : -EFSBADCRC;
				1954	}
				1955	state->fc_crc = 0;
				1956	break;
				1957	case EXT4_FC_TAG_HEAD:
				1958	head = (struct ext4_fc_head *)ext4_fc_tag_val(tl);
				1959	if (le32_to_cpu(head->fc_features) &
				1960	~EXT4_FC_SUPPORTED_FEATURES) {
				1961	ret = -EOPNOTSUPP;
				1962	break;
				1963	}
				1964	if (le32_to_cpu(head->fc_tid) != expected_tid) {
				1965	ret = JBD2_FC_REPLAY_STOP;
				1966	break;
				1967	}
				1968	state->fc_cur_tag++;
				1969	state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
				1970	sizeof(*tl) + ext4_fc_tag_len(tl));
				1971	break;
				1972	default:
				1973	ret = state->fc_replay_num_tags ?
				1974	JBD2_FC_REPLAY_STOP : -ECANCELED;
				1975	}
				1976	if (ret < 0 \|\| ret == JBD2_FC_REPLAY_STOP)
				1977	break;
				1978	}
				1979
				1980	out_err:
				1981	trace_ext4_fc_replay_scan(sb, ret, off);
				1982	return ret;
				1983	}
				1984
Harshad Shirwadkar	5b849b5	2020-10-15 13:37:58 -0700	[diff] [blame]	1985	/*
				1986	* Main recovery path entry point.
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	1987	* The meaning of return codes is similar as above.
Harshad Shirwadkar	5b849b5	2020-10-15 13:37:58 -0700	[diff] [blame]	1988	*/
				1989	static int ext4_fc_replay(journal_t journal, struct buffer_head bh,
				1990	enum passtype pass, int off, tid_t expected_tid)
				1991	{
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	1992	struct super_block *sb = journal->j_private;
				1993	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1994	struct ext4_fc_tl *tl;
				1995	__u8 start, end;
				1996	int ret = JBD2_FC_REPLAY_CONTINUE;
				1997	struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
				1998	struct ext4_fc_tail *tail;
				1999
				2000	if (pass == PASS_SCAN) {
				2001	state->fc_current_pass = PASS_SCAN;
				2002	return ext4_fc_replay_scan(journal, bh, off, expected_tid);
				2003	}
				2004
				2005	if (state->fc_current_pass != pass) {
				2006	state->fc_current_pass = pass;
				2007	sbi->s_mount_state \|= EXT4_FC_REPLAY;
				2008	}
				2009	if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
				2010	jbd_debug(1, "Replay stops\n");
				2011	ext4_fc_set_bitmaps_and_counters(sb);
				2012	return 0;
				2013	}
				2014
				2015	#ifdef CONFIG_EXT4_DEBUG
				2016	if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) {
				2017	pr_warn("Dropping fc block %d because max_replay set\n", off);
				2018	return JBD2_FC_REPLAY_STOP;
				2019	}
				2020	#endif
				2021
				2022	start = (u8 *)bh->b_data;
				2023	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
				2024
				2025	fc_for_each_tl(start, end, tl) {
				2026	if (state->fc_replay_num_tags == 0) {
				2027	ret = JBD2_FC_REPLAY_STOP;
				2028	ext4_fc_set_bitmaps_and_counters(sb);
				2029	break;
				2030	}
				2031	jbd_debug(3, "Replay phase, tag:%s\n",
				2032	tag2str(le16_to_cpu(tl->fc_tag)));
				2033	state->fc_replay_num_tags--;
				2034	switch (le16_to_cpu(tl->fc_tag)) {
				2035	case EXT4_FC_TAG_LINK:
				2036	ret = ext4_fc_replay_link(sb, tl);
				2037	break;
				2038	case EXT4_FC_TAG_UNLINK:
				2039	ret = ext4_fc_replay_unlink(sb, tl);
				2040	break;
				2041	case EXT4_FC_TAG_ADD_RANGE:
				2042	ret = ext4_fc_replay_add_range(sb, tl);
				2043	break;
				2044	case EXT4_FC_TAG_CREAT:
				2045	ret = ext4_fc_replay_create(sb, tl);
				2046	break;
				2047	case EXT4_FC_TAG_DEL_RANGE:
				2048	ret = ext4_fc_replay_del_range(sb, tl);
				2049	break;
				2050	case EXT4_FC_TAG_INODE:
				2051	ret = ext4_fc_replay_inode(sb, tl);
				2052	break;
				2053	case EXT4_FC_TAG_PAD:
				2054	trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
				2055	ext4_fc_tag_len(tl), 0);
				2056	break;
				2057	case EXT4_FC_TAG_TAIL:
				2058	trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0,
				2059	ext4_fc_tag_len(tl), 0);
				2060	tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
				2061	WARN_ON(le32_to_cpu(tail->fc_tid) != expected_tid);
				2062	break;
				2063	case EXT4_FC_TAG_HEAD:
				2064	break;
				2065	default:
				2066	trace_ext4_fc_replay(sb, le16_to_cpu(tl->fc_tag), 0,
				2067	ext4_fc_tag_len(tl), 0);
				2068	ret = -ECANCELED;
				2069	break;
				2070	}
				2071	if (ret < 0)
				2072	break;
				2073	ret = JBD2_FC_REPLAY_CONTINUE;
				2074	}
				2075	return ret;
Harshad Shirwadkar	5b849b5	2020-10-15 13:37:58 -0700	[diff] [blame]	2076	}
				2077
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	2078	void ext4_fc_init(struct super_block sb, journal_t journal)
				2079	{
Harshad Shirwadkar	e029c5f	2020-10-26 21:49:14 -0700	[diff] [blame]	2080	int num_fc_blocks;
				2081
Harshad Shirwadkar	5b849b5	2020-10-15 13:37:58 -0700	[diff] [blame]	2082	/*
				2083	* We set replay callback even if fast commit disabled because we may
				2084	* could still have fast commit blocks that need to be replayed even if
				2085	* fast commit has now been turned off.
				2086	*/
				2087	journal->j_fc_replay_callback = ext4_fc_replay;
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	2088	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
				2089	return;
Harshad Shirwadkar	ff780b9	2020-10-15 13:37:56 -0700	[diff] [blame]	2090	journal->j_fc_cleanup_callback = ext4_fc_cleanup;
Harshad Shirwadkar	e029c5f	2020-10-26 21:49:14 -0700	[diff] [blame]	2091	if (!buffer_uptodate(journal->j_sb_buffer)
				2092	&& ext4_read_bh_lock(journal->j_sb_buffer, REQ_META \| REQ_PRIO,
				2093	true)) {
				2094	ext4_msg(sb, KERN_ERR, "I/O error on journal");
				2095	return;
				2096	}
				2097	num_fc_blocks = be32_to_cpu(journal->j_superblock->s_num_fc_blks);
				2098	if (jbd2_fc_init(journal, num_fc_blocks ? num_fc_blocks :
				2099	EXT4_NUM_FC_BLKS)) {
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	2100	pr_warn("Error while enabling fast commits, turning off.");
				2101	ext4_clear_feature_fast_commit(sb);
				2102	}
				2103	}
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	2104
Harshad Shirwadkar	ce8c59d	2020-10-15 13:38:01 -0700	[diff] [blame]	2105	const char *fc_ineligible_reasons[] = {
				2106	"Extended attributes changed",
				2107	"Cross rename",
				2108	"Journal flag changed",
				2109	"Insufficient memory",
				2110	"Swap boot",
				2111	"Resize",
				2112	"Dir renamed",
				2113	"Falloc range op",
				2114	"FC Commit Failed"
				2115	};
				2116
				2117	int ext4_fc_info_show(struct seq_file seq, void v)
				2118	{
				2119	struct ext4_sb_info sbi = EXT4_SB((struct super_block )seq->private);
				2120	struct ext4_fc_stats *stats = &sbi->s_fc_stats;
				2121	int i;
				2122
				2123	if (v != SEQ_START_TOKEN)
				2124	return 0;
				2125
				2126	seq_printf(seq,
				2127	"fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
				2128	stats->fc_num_commits, stats->fc_ineligible_commits,
				2129	stats->fc_numblks,
				2130	div_u64(sbi->s_fc_avg_commit_time, 1000));
				2131	seq_puts(seq, "Ineligible reasons:\n");
				2132	for (i = 0; i < EXT4_FC_REASON_MAX; i++)
				2133	seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
				2134	stats->fc_ineligible_reason_count[i]);
				2135
				2136	return 0;
				2137	}
				2138
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	2139	int __init ext4_fc_init_dentry_cache(void)
				2140	{
				2141	ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
				2142	SLAB_RECLAIM_ACCOUNT);
				2143
				2144	if (ext4_fc_dentry_cachep == NULL)
				2145	return -ENOMEM;
				2146
				2147	return 0;
				2148	}