Blame - fs/ext4/fast_commit.c - linux-5.10

blob: e69c580fa91e3fd5df5af753478afbf2592decd5 [file] [log] [blame]

Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2
				3	/*
				4	* fs/ext4/fast_commit.c
				5	*
				6	* Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com>
				7	*
				8	* Ext4 fast commits routines.
				9	*/
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	10	#include "ext4.h"
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	11	#include "ext4_jbd2.h"
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	12	#include "ext4_extents.h"
				13	#include "mballoc.h"
				14
				15	/*
				16	* Ext4 Fast Commits
				17	* -----------------
				18	*
				19	* Ext4 fast commits implement fine grained journalling for Ext4.
				20	*
				21	* Fast commits are organized as a log of tag-length-value (TLV) structs. (See
				22	* struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by
				23	* TLV during the recovery phase. For the scenarios for which we currently
				24	* don't have replay code, fast commit falls back to full commits.
				25	* Fast commits record delta in one of the following three categories.
				26	*
				27	* (A) Directory entry updates:
				28	*
				29	* - EXT4_FC_TAG_UNLINK - records directory entry unlink
				30	* - EXT4_FC_TAG_LINK - records directory entry link
				31	* - EXT4_FC_TAG_CREAT - records inode and directory entry creation
				32	*
				33	* (B) File specific data range updates:
				34	*
				35	* - EXT4_FC_TAG_ADD_RANGE - records addition of new blocks to an inode
				36	* - EXT4_FC_TAG_DEL_RANGE - records deletion of blocks from an inode
				37	*
				38	* (C) Inode metadata (mtime / ctime etc):
				39	*
				40	* - EXT4_FC_TAG_INODE - record the inode that should be replayed
				41	* during recovery. Note that iblocks field is
				42	* not replayed and instead derived during
				43	* replay.
				44	* Commit Operation
				45	* ----------------
				46	* With fast commits, we maintain all the directory entry operations in the
				47	* order in which they are issued in an in-memory queue. This queue is flushed
				48	* to disk during the commit operation. We also maintain a list of inodes
				49	* that need to be committed during a fast commit in another in memory queue of
				50	* inodes. During the commit operation, we commit in the following order:
				51	*
				52	* [1] Lock inodes for any further data updates by setting COMMITTING state
				53	* [2] Submit data buffers of all the inodes
				54	* [3] Wait for [2] to complete
				55	* [4] Commit all the directory entry updates in the fast commit space
				56	* [5] Commit all the changed inode structures
				57	* [6] Write tail tag (this tag ensures the atomicity, please read the following
				58	* section for more details).
				59	* [7] Wait for [4], [5] and [6] to complete.
				60	*
				61	* All the inode updates must call ext4_fc_start_update() before starting an
				62	* update. If such an ongoing update is present, fast commit waits for it to
				63	* complete. The completion of such an update is marked by
				64	* ext4_fc_stop_update().
				65	*
				66	* Fast Commit Ineligibility
				67	* -------------------------
				68	* Not all operations are supported by fast commits today (e.g extended
				69	* attributes). Fast commit ineligiblity is marked by calling one of the
				70	* two following functions:
				71	*
				72	* - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall
				73	* back to full commit. This is useful in case of transient errors.
				74	*
				75	* - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all
				76	* the fast commits happening between ext4_fc_start_ineligible() and
				77	* ext4_fc_stop_ineligible() and one fast commit after the call to
				78	* ext4_fc_stop_ineligible() to fall back to full commits. It is important to
				79	* make one more fast commit to fall back to full commit after stop call so
				80	* that it guaranteed that the fast commit ineligible operation contained
				81	* within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is
				82	* followed by at least 1 full commit.
				83	*
				84	* Atomicity of commits
				85	* --------------------
				86	* In order to gaurantee atomicity during the commit operation, fast commit
				87	* uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
				88	* tag contains CRC of the contents and TID of the transaction after which
				89	* this fast commit should be applied. Recovery code replays fast commit
				90	* logs only if there's at least 1 valid tail present. For every fast commit
				91	* operation, there is 1 tail. This means, we may end up with multiple tails
				92	* in the fast commit space. Here's an example:
				93	*
				94	* - Create a new file A and remove existing file B
				95	* - fsync()
				96	* - Append contents to file A
				97	* - Truncate file A
				98	* - fsync()
				99	*
				100	* The fast commit space at the end of above operations would look like this:
				101	* [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL]
				102	* \|<--- Fast Commit 1 --->\|<--- Fast Commit 2 ---->\|
				103	*
				104	* Replay code should thus check for all the valid tails in the FC area.
				105	*
				106	* TODOs
				107	* -----
				108	* 1) Make fast commit atomic updates more fine grained. Today, a fast commit
				109	* eligible update must be protected within ext4_fc_start_update() and
				110	* ext4_fc_stop_update(). These routines are called at much higher
				111	* routines. This can be made more fine grained by combining with
				112	* ext4_journal_start().
				113	*
				114	* 2) Same above for ext4_fc_start_ineligible() and ext4_fc_stop_ineligible()
				115	*
				116	* 3) Handle more ineligible cases.
				117	*/
				118
				119	#include <trace/events/ext4.h>
				120	static struct kmem_cache *ext4_fc_dentry_cachep;
				121
				122	static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
				123	{
				124	BUFFER_TRACE(bh, "");
				125	if (uptodate) {
				126	ext4_debug("%s: Block %lld up-to-date",
				127	__func__, bh->b_blocknr);
				128	set_buffer_uptodate(bh);
				129	} else {
				130	ext4_debug("%s: Block %lld not up-to-date",
				131	__func__, bh->b_blocknr);
				132	clear_buffer_uptodate(bh);
				133	}
				134
				135	unlock_buffer(bh);
				136	}
				137
				138	static inline void ext4_fc_reset_inode(struct inode *inode)
				139	{
				140	struct ext4_inode_info *ei = EXT4_I(inode);
				141
				142	ei->i_fc_lblk_start = 0;
				143	ei->i_fc_lblk_len = 0;
				144	}
				145
				146	void ext4_fc_init_inode(struct inode *inode)
				147	{
				148	struct ext4_inode_info *ei = EXT4_I(inode);
				149
				150	ext4_fc_reset_inode(inode);
				151	ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
				152	INIT_LIST_HEAD(&ei->i_fc_list);
				153	init_waitqueue_head(&ei->i_fc_wait);
				154	atomic_set(&ei->i_fc_updates, 0);
				155	ei->i_fc_committed_subtid = 0;
				156	}
				157
				158	/*
				159	* Inform Ext4's fast about start of an inode update
				160	*
				161	* This function is called by the high level call VFS callbacks before
				162	* performing any inode update. This function blocks if there's an ongoing
				163	* fast commit on the inode in question.
				164	*/
				165	void ext4_fc_start_update(struct inode *inode)
				166	{
				167	struct ext4_inode_info *ei = EXT4_I(inode);
				168
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	169	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) \|\|
				170	(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	171	return;
				172
				173	restart:
				174	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				175	if (list_empty(&ei->i_fc_list))
				176	goto out;
				177
				178	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
				179	wait_queue_head_t *wq;
				180	#if (BITS_PER_LONG < 64)
				181	DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
				182	EXT4_STATE_FC_COMMITTING);
				183	wq = bit_waitqueue(&ei->i_state_flags,
				184	EXT4_STATE_FC_COMMITTING);
				185	#else
				186	DEFINE_WAIT_BIT(wait, &ei->i_flags,
				187	EXT4_STATE_FC_COMMITTING);
				188	wq = bit_waitqueue(&ei->i_flags,
				189	EXT4_STATE_FC_COMMITTING);
				190	#endif
				191	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
				192	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				193	schedule();
				194	finish_wait(wq, &wait.wq_entry);
				195	goto restart;
				196	}
				197	out:
				198	atomic_inc(&ei->i_fc_updates);
				199	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				200	}
				201
				202	/*
				203	* Stop inode update and wake up waiting fast commits if any.
				204	*/
				205	void ext4_fc_stop_update(struct inode *inode)
				206	{
				207	struct ext4_inode_info *ei = EXT4_I(inode);
				208
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	209	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) \|\|
				210	(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	211	return;
				212
				213	if (atomic_dec_and_test(&ei->i_fc_updates))
				214	wake_up_all(&ei->i_fc_wait);
				215	}
				216
				217	/*
				218	* Remove inode from fast commit list. If the inode is being committed
				219	* we wait until inode commit is done.
				220	*/
				221	void ext4_fc_del(struct inode *inode)
				222	{
				223	struct ext4_inode_info *ei = EXT4_I(inode);
				224
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	225	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) \|\|
				226	(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY))
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	227	return;
				228
				229	restart:
				230	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				231	if (list_empty(&ei->i_fc_list)) {
				232	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				233	return;
				234	}
				235
				236	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
				237	wait_queue_head_t *wq;
				238	#if (BITS_PER_LONG < 64)
				239	DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
				240	EXT4_STATE_FC_COMMITTING);
				241	wq = bit_waitqueue(&ei->i_state_flags,
				242	EXT4_STATE_FC_COMMITTING);
				243	#else
				244	DEFINE_WAIT_BIT(wait, &ei->i_flags,
				245	EXT4_STATE_FC_COMMITTING);
				246	wq = bit_waitqueue(&ei->i_flags,
				247	EXT4_STATE_FC_COMMITTING);
				248	#endif
				249	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
				250	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				251	schedule();
				252	finish_wait(wq, &wait.wq_entry);
				253	goto restart;
				254	}
				255	if (!list_empty(&ei->i_fc_list))
				256	list_del_init(&ei->i_fc_list);
				257	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				258	}
				259
				260	/*
				261	* Mark file system as fast commit ineligible. This means that next commit
				262	* operation would result in a full jbd2 commit.
				263	*/
				264	void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
				265	{
				266	struct ext4_sb_info *sbi = EXT4_SB(sb);
				267
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	268	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) \|\|
				269	(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
				270	return;
				271
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	272	sbi->s_mount_flags \|= EXT4_MF_FC_INELIGIBLE;
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	273	WARN_ON(reason >= EXT4_FC_REASON_MAX);
				274	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
				275	}
				276
				277	/*
				278	* Start a fast commit ineligible update. Any commits that happen while
				279	* such an operation is in progress fall back to full commits.
				280	*/
				281	void ext4_fc_start_ineligible(struct super_block *sb, int reason)
				282	{
				283	struct ext4_sb_info *sbi = EXT4_SB(sb);
				284
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	285	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) \|\|
				286	(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
				287	return;
				288
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	289	WARN_ON(reason >= EXT4_FC_REASON_MAX);
				290	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
				291	atomic_inc(&sbi->s_fc_ineligible_updates);
				292	}
				293
				294	/*
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	295	* Stop a fast commit ineligible update. We set EXT4_MF_FC_INELIGIBLE flag here
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	296	* to ensure that after stopping the ineligible update, at least one full
				297	* commit takes place.
				298	*/
				299	void ext4_fc_stop_ineligible(struct super_block *sb)
				300	{
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	301	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) \|\|
				302	(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
				303	return;
				304
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	305	EXT4_SB(sb)->s_mount_flags \|= EXT4_MF_FC_INELIGIBLE;
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	306	atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
				307	}
				308
				309	static inline int ext4_fc_is_ineligible(struct super_block *sb)
				310	{
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	311	return (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FC_INELIGIBLE) \|\|
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	312	atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates);
				313	}
				314
				315	/*
				316	* Generic fast commit tracking function. If this is the first time this we are
				317	* called after a full commit, we initialize fast commit fields and then call
				318	* __fc_track_fn() with update = 0. If we have already been called after a full
				319	* commit, we pass update = 1. Based on that, the track function can determine
				320	* if it needs to track a field for the first time or if it needs to just
				321	* update the previously tracked value.
				322	*
				323	* If enqueue is set, this function enqueues the inode in fast commit list.
				324	*/
				325	static int ext4_fc_track_template(
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	326	handle_t handle, struct inode inode,
				327	int (__fc_track_fn)(struct inode , void *, bool),
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	328	void *args, int enqueue)
				329	{
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	330	bool update = false;
				331	struct ext4_inode_info *ei = EXT4_I(inode);
				332	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	333	tid_t tid = 0;
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	334	int ret;
				335
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	336	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT) \|\|
				337	(sbi->s_mount_state & EXT4_FC_REPLAY))
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	338	return -EOPNOTSUPP;
				339
				340	if (ext4_fc_is_ineligible(inode->i_sb))
				341	return -EINVAL;
				342
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	343	tid = handle->h_transaction->t_tid;
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	344	mutex_lock(&ei->i_fc_lock);
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	345	if (tid == ei->i_sync_tid) {
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	346	update = true;
				347	} else {
				348	ext4_fc_reset_inode(inode);
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	349	ei->i_sync_tid = tid;
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	350	}
				351	ret = __fc_track_fn(inode, args, update);
				352	mutex_unlock(&ei->i_fc_lock);
				353
				354	if (!enqueue)
				355	return ret;
				356
				357	spin_lock(&sbi->s_fc_lock);
				358	if (list_empty(&EXT4_I(inode)->i_fc_list))
				359	list_add_tail(&EXT4_I(inode)->i_fc_list,
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	360	(sbi->s_mount_flags & EXT4_MF_FC_COMMITTING) ?
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	361	&sbi->s_fc_q[FC_Q_STAGING] :
				362	&sbi->s_fc_q[FC_Q_MAIN]);
				363	spin_unlock(&sbi->s_fc_lock);
				364
				365	return ret;
				366	}
				367
				368	struct __track_dentry_update_args {
				369	struct dentry *dentry;
				370	int op;
				371	};
				372
				373	/* __track_fn for directory entry updates. Called with ei->i_fc_lock. */
				374	static int __track_dentry_update(struct inode inode, void arg, bool update)
				375	{
				376	struct ext4_fc_dentry_update *node;
				377	struct ext4_inode_info *ei = EXT4_I(inode);
				378	struct __track_dentry_update_args *dentry_update =
				379	(struct __track_dentry_update_args *)arg;
				380	struct dentry *dentry = dentry_update->dentry;
				381	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
				382
				383	mutex_unlock(&ei->i_fc_lock);
				384	node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
				385	if (!node) {
Harshad Shirwadkar	b21ebf1	2020-11-05 19:58:51 -0800	[diff] [blame]	386	ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	387	mutex_lock(&ei->i_fc_lock);
				388	return -ENOMEM;
				389	}
				390
				391	node->fcd_op = dentry_update->op;
				392	node->fcd_parent = dentry->d_parent->d_inode->i_ino;
				393	node->fcd_ino = inode->i_ino;
				394	if (dentry->d_name.len > DNAME_INLINE_LEN) {
				395	node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
				396	if (!node->fcd_name.name) {
				397	kmem_cache_free(ext4_fc_dentry_cachep, node);
				398	ext4_fc_mark_ineligible(inode->i_sb,
Harshad Shirwadkar	b21ebf1	2020-11-05 19:58:51 -0800	[diff] [blame]	399	EXT4_FC_REASON_NOMEM);
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	400	mutex_lock(&ei->i_fc_lock);
				401	return -ENOMEM;
				402	}
				403	memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
				404	dentry->d_name.len);
				405	} else {
				406	memcpy(node->fcd_iname, dentry->d_name.name,
				407	dentry->d_name.len);
				408	node->fcd_name.name = node->fcd_iname;
				409	}
				410	node->fcd_name.len = dentry->d_name.len;
				411
				412	spin_lock(&sbi->s_fc_lock);
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	413	if (sbi->s_mount_flags & EXT4_MF_FC_COMMITTING)
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	414	list_add_tail(&node->fcd_list,
				415	&sbi->s_fc_dentry_q[FC_Q_STAGING]);
				416	else
				417	list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
				418	spin_unlock(&sbi->s_fc_lock);
				419	mutex_lock(&ei->i_fc_lock);
				420
				421	return 0;
				422	}
				423
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	424	void __ext4_fc_track_unlink(handle_t *handle,
				425	struct inode inode, struct dentry dentry)
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	426	{
				427	struct __track_dentry_update_args args;
				428	int ret;
				429
				430	args.dentry = dentry;
				431	args.op = EXT4_FC_TAG_UNLINK;
				432
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	433	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	434	(void *)&args, 0);
				435	trace_ext4_fc_track_unlink(inode, dentry, ret);
				436	}
				437
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	438	void ext4_fc_track_unlink(handle_t handle, struct dentry dentry)
				439	{
				440	__ext4_fc_track_unlink(handle, d_inode(dentry), dentry);
				441	}
				442
				443	void __ext4_fc_track_link(handle_t *handle,
				444	struct inode inode, struct dentry dentry)
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	445	{
				446	struct __track_dentry_update_args args;
				447	int ret;
				448
				449	args.dentry = dentry;
				450	args.op = EXT4_FC_TAG_LINK;
				451
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	452	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	453	(void *)&args, 0);
				454	trace_ext4_fc_track_link(inode, dentry, ret);
				455	}
				456
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	457	void ext4_fc_track_link(handle_t handle, struct dentry dentry)
				458	{
				459	__ext4_fc_track_link(handle, d_inode(dentry), dentry);
				460	}
				461
				462	void ext4_fc_track_create(handle_t handle, struct dentry dentry)
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	463	{
				464	struct __track_dentry_update_args args;
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	465	struct inode *inode = d_inode(dentry);
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	466	int ret;
				467
				468	args.dentry = dentry;
				469	args.op = EXT4_FC_TAG_CREAT;
				470
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	471	ret = ext4_fc_track_template(handle, inode, __track_dentry_update,
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	472	(void *)&args, 0);
				473	trace_ext4_fc_track_create(inode, dentry, ret);
				474	}
				475
				476	/* __track_fn for inode tracking */
				477	static int __track_inode(struct inode inode, void arg, bool update)
				478	{
				479	if (update)
				480	return -EEXIST;
				481
				482	EXT4_I(inode)->i_fc_lblk_len = 0;
				483
				484	return 0;
				485	}
				486
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	487	void ext4_fc_track_inode(handle_t handle, struct inode inode)
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	488	{
				489	int ret;
				490
				491	if (S_ISDIR(inode->i_mode))
				492	return;
				493
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	494	ret = ext4_fc_track_template(handle, inode, __track_inode, NULL, 1);
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	495	trace_ext4_fc_track_inode(inode, ret);
				496	}
				497
				498	struct __track_range_args {
				499	ext4_lblk_t start, end;
				500	};
				501
				502	/* __track_fn for tracking data updates */
				503	static int __track_range(struct inode inode, void arg, bool update)
				504	{
				505	struct ext4_inode_info *ei = EXT4_I(inode);
				506	ext4_lblk_t oldstart;
				507	struct __track_range_args *__arg =
				508	(struct __track_range_args *)arg;
				509
				510	if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
				511	ext4_debug("Special inode %ld being modified\n", inode->i_ino);
				512	return -ECANCELED;
				513	}
				514
				515	oldstart = ei->i_fc_lblk_start;
				516
				517	if (update && ei->i_fc_lblk_len > 0) {
				518	ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start);
				519	ei->i_fc_lblk_len =
				520	max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) -
				521	ei->i_fc_lblk_start + 1;
				522	} else {
				523	ei->i_fc_lblk_start = __arg->start;
				524	ei->i_fc_lblk_len = __arg->end - __arg->start + 1;
				525	}
				526
				527	return 0;
				528	}
				529
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	530	void ext4_fc_track_range(handle_t handle, struct inode inode, ext4_lblk_t start,
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	531	ext4_lblk_t end)
				532	{
				533	struct __track_range_args args;
				534	int ret;
				535
				536	if (S_ISDIR(inode->i_mode))
				537	return;
				538
				539	args.start = start;
				540	args.end = end;
				541
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	542	ret = ext4_fc_track_template(handle, inode, __track_range, &args, 1);
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	543
				544	trace_ext4_fc_track_range(inode, start, end, ret);
				545	}
				546
				547	static void ext4_fc_submit_bh(struct super_block *sb)
				548	{
				549	int write_flags = REQ_SYNC;
				550	struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
				551
				552	if (test_opt(sb, BARRIER))
				553	write_flags \|= REQ_FUA \| REQ_PREFLUSH;
				554	lock_buffer(bh);
				555	clear_buffer_dirty(bh);
				556	set_buffer_uptodate(bh);
				557	bh->b_end_io = ext4_end_buffer_io_sync;
				558	submit_bh(REQ_OP_WRITE, write_flags, bh);
				559	EXT4_SB(sb)->s_fc_bh = NULL;
				560	}
				561
				562	/* Ext4 commit path routines */
				563
				564	/* memzero and update CRC */
				565	static void ext4_fc_memzero(struct super_block sb, void *dst, int len,
				566	u32 *crc)
				567	{
				568	void *ret;
				569
				570	ret = memset(dst, 0, len);
				571	if (crc)
				572	crc = ext4_chksum(EXT4_SB(sb), crc, dst, len);
				573	return ret;
				574	}
				575
				576	/*
				577	* Allocate len bytes on a fast commit buffer.
				578	*
				579	* During the commit time this function is used to manage fast commit
				580	* block space. We don't split a fast commit log onto different
				581	* blocks. So this function makes sure that if there's not enough space
				582	* on the current block, the remaining space in the current block is
				583	* marked as unused by adding EXT4_FC_TAG_PAD tag. In that case,
				584	* new block is from jbd2 and CRC is updated to reflect the padding
				585	* we added.
				586	*/
				587	static u8 ext4_fc_reserve_space(struct super_block sb, int len, u32 *crc)
				588	{
				589	struct ext4_fc_tl *tl;
				590	struct ext4_sb_info *sbi = EXT4_SB(sb);
				591	struct buffer_head *bh;
				592	int bsize = sbi->s_journal->j_blocksize;
				593	int ret, off = sbi->s_fc_bytes % bsize;
				594	int pad_len;
				595
				596	/*
				597	* After allocating len, we should have space at least for a 0 byte
				598	* padding.
				599	*/
				600	if (len + sizeof(struct ext4_fc_tl) > bsize)
				601	return NULL;
				602
				603	if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) {
				604	/*
				605	* Only allocate from current buffer if we have enough space for
				606	* this request AND we have space to add a zero byte padding.
				607	*/
				608	if (!sbi->s_fc_bh) {
				609	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
				610	if (ret)
				611	return NULL;
				612	sbi->s_fc_bh = bh;
				613	}
				614	sbi->s_fc_bytes += len;
				615	return sbi->s_fc_bh->b_data + off;
				616	}
				617	/* Need to add PAD tag */
				618	tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
				619	tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
				620	pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl);
				621	tl->fc_len = cpu_to_le16(pad_len);
				622	if (crc)
				623	crc = ext4_chksum(sbi, crc, tl, sizeof(*tl));
				624	if (pad_len > 0)
				625	ext4_fc_memzero(sb, tl + 1, pad_len, crc);
				626	ext4_fc_submit_bh(sb);
				627
				628	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
				629	if (ret)
				630	return NULL;
				631	sbi->s_fc_bh = bh;
				632	sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
				633	return sbi->s_fc_bh->b_data;
				634	}
				635
				636	/* memcpy to fc reserved space and update CRC */
				637	static void ext4_fc_memcpy(struct super_block sb, void dst, const void src,
				638	int len, u32 *crc)
				639	{
				640	if (crc)
				641	crc = ext4_chksum(EXT4_SB(sb), crc, src, len);
				642	return memcpy(dst, src, len);
				643	}
				644
				645	/*
				646	* Complete a fast commit by writing tail tag.
				647	*
				648	* Writing tail tag marks the end of a fast commit. In order to guarantee
				649	* atomicity, after writing tail tag, even if there's space remaining
				650	* in the block, next commit shouldn't use it. That's why tail tag
				651	* has the length as that of the remaining space on the block.
				652	*/
				653	static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
				654	{
				655	struct ext4_sb_info *sbi = EXT4_SB(sb);
				656	struct ext4_fc_tl tl;
				657	struct ext4_fc_tail tail;
				658	int off, bsize = sbi->s_journal->j_blocksize;
				659	u8 *dst;
				660
				661	/*
				662	* ext4_fc_reserve_space takes care of allocating an extra block if
				663	* there's no enough space on this block for accommodating this tail.
				664	*/
				665	dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc);
				666	if (!dst)
				667	return -ENOSPC;
				668
				669	off = sbi->s_fc_bytes % bsize;
				670
				671	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
				672	tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
				673	sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
				674
				675	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc);
				676	dst += sizeof(tl);
				677	tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
				678	ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
				679	dst += sizeof(tail.fc_tid);
				680	tail.fc_crc = cpu_to_le32(crc);
				681	ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
				682
				683	ext4_fc_submit_bh(sb);
				684
				685	return 0;
				686	}
				687
				688	/*
				689	* Adds tag, length, value and updates CRC. Returns true if tlv was added.
				690	* Returns false if there's not enough space.
				691	*/
				692	static bool ext4_fc_add_tlv(struct super_block sb, u16 tag, u16 len, u8 val,
				693	u32 *crc)
				694	{
				695	struct ext4_fc_tl tl;
				696	u8 *dst;
				697
				698	dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc);
				699	if (!dst)
				700	return false;
				701
				702	tl.fc_tag = cpu_to_le16(tag);
				703	tl.fc_len = cpu_to_le16(len);
				704
				705	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
				706	ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc);
				707
				708	return true;
				709	}
				710
				711	/* Same as above, but adds dentry tlv. */
				712	static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u16 tag,
				713	int parent_ino, int ino, int dlen,
				714	const unsigned char *dname,
				715	u32 *crc)
				716	{
				717	struct ext4_fc_dentry_info fcd;
				718	struct ext4_fc_tl tl;
				719	u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen,
				720	crc);
				721
				722	if (!dst)
				723	return false;
				724
				725	fcd.fc_parent_ino = cpu_to_le32(parent_ino);
				726	fcd.fc_ino = cpu_to_le32(ino);
				727	tl.fc_tag = cpu_to_le16(tag);
				728	tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
				729	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
				730	dst += sizeof(tl);
				731	ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
				732	dst += sizeof(fcd);
				733	ext4_fc_memcpy(sb, dst, dname, dlen, crc);
				734	dst += dlen;
				735
				736	return true;
				737	}
				738
				739	/*
				740	* Writes inode in the fast commit space under TLV with tag @tag.
				741	* Returns 0 on success, error on failure.
				742	*/
				743	static int ext4_fc_write_inode(struct inode inode, u32 crc)
				744	{
				745	struct ext4_inode_info *ei = EXT4_I(inode);
				746	int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
				747	int ret;
				748	struct ext4_iloc iloc;
				749	struct ext4_fc_inode fc_inode;
				750	struct ext4_fc_tl tl;
				751	u8 *dst;
				752
				753	ret = ext4_get_inode_loc(inode, &iloc);
				754	if (ret)
				755	return ret;
				756
				757	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
				758	inode_len += ei->i_extra_isize;
				759
				760	fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
				761	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
				762	tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
				763
				764	dst = ext4_fc_reserve_space(inode->i_sb,
				765	sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc);
				766	if (!dst)
				767	return -ECANCELED;
				768
				769	if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc))
				770	return -ECANCELED;
				771	dst += sizeof(tl);
				772	if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
				773	return -ECANCELED;
				774	dst += sizeof(fc_inode);
				775	if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
				776	inode_len, crc))
				777	return -ECANCELED;
				778
				779	return 0;
				780	}
				781
				782	/*
				783	* Writes updated data ranges for the inode in question. Updates CRC.
				784	* Returns 0 on success, error otherwise.
				785	*/
				786	static int ext4_fc_write_inode_data(struct inode inode, u32 crc)
				787	{
				788	ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
				789	struct ext4_inode_info *ei = EXT4_I(inode);
				790	struct ext4_map_blocks map;
				791	struct ext4_fc_add_range fc_ext;
				792	struct ext4_fc_del_range lrange;
				793	struct ext4_extent *ex;
				794	int ret;
				795
				796	mutex_lock(&ei->i_fc_lock);
				797	if (ei->i_fc_lblk_len == 0) {
				798	mutex_unlock(&ei->i_fc_lock);
				799	return 0;
				800	}
				801	old_blk_size = ei->i_fc_lblk_start;
				802	new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
				803	ei->i_fc_lblk_len = 0;
				804	mutex_unlock(&ei->i_fc_lock);
				805
				806	cur_lblk_off = old_blk_size;
				807	jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n",
				808	__func__, cur_lblk_off, new_blk_size, inode->i_ino);
				809
				810	while (cur_lblk_off <= new_blk_size) {
				811	map.m_lblk = cur_lblk_off;
				812	map.m_len = new_blk_size - cur_lblk_off + 1;
				813	ret = ext4_map_blocks(NULL, inode, &map, 0);
				814	if (ret < 0)
				815	return -ECANCELED;
				816
				817	if (map.m_len == 0) {
				818	cur_lblk_off++;
				819	continue;
				820	}
				821
				822	if (ret == 0) {
				823	lrange.fc_ino = cpu_to_le32(inode->i_ino);
				824	lrange.fc_lblk = cpu_to_le32(map.m_lblk);
				825	lrange.fc_len = cpu_to_le32(map.m_len);
				826	if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
				827	sizeof(lrange), (u8 *)&lrange, crc))
				828	return -ENOSPC;
				829	} else {
				830	fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
				831	ex = (struct ext4_extent *)&fc_ext.fc_ex;
				832	ex->ee_block = cpu_to_le32(map.m_lblk);
				833	ex->ee_len = cpu_to_le16(map.m_len);
				834	ext4_ext_store_pblock(ex, map.m_pblk);
				835	if (map.m_flags & EXT4_MAP_UNWRITTEN)
				836	ext4_ext_mark_unwritten(ex);
				837	else
				838	ext4_ext_mark_initialized(ex);
				839	if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
				840	sizeof(fc_ext), (u8 *)&fc_ext, crc))
				841	return -ENOSPC;
				842	}
				843
				844	cur_lblk_off += map.m_len;
				845	}
				846
				847	return 0;
				848	}
				849
				850
				851	/* Submit data for all the fast commit inodes */
				852	static int ext4_fc_submit_inode_data_all(journal_t *journal)
				853	{
				854	struct super_block sb = (struct super_block )(journal->j_private);
				855	struct ext4_sb_info *sbi = EXT4_SB(sb);
				856	struct ext4_inode_info *ei;
				857	struct list_head *pos;
				858	int ret = 0;
				859
				860	spin_lock(&sbi->s_fc_lock);
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	861	sbi->s_mount_flags \|= EXT4_MF_FC_COMMITTING;
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	862	list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
				863	ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
				864	ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
				865	while (atomic_read(&ei->i_fc_updates)) {
				866	DEFINE_WAIT(wait);
				867
				868	prepare_to_wait(&ei->i_fc_wait, &wait,
				869	TASK_UNINTERRUPTIBLE);
				870	if (atomic_read(&ei->i_fc_updates)) {
				871	spin_unlock(&sbi->s_fc_lock);
				872	schedule();
				873	spin_lock(&sbi->s_fc_lock);
				874	}
				875	finish_wait(&ei->i_fc_wait, &wait);
				876	}
				877	spin_unlock(&sbi->s_fc_lock);
				878	ret = jbd2_submit_inode_data(ei->jinode);
				879	if (ret)
				880	return ret;
				881	spin_lock(&sbi->s_fc_lock);
				882	}
				883	spin_unlock(&sbi->s_fc_lock);
				884
				885	return ret;
				886	}
				887
				888	/* Wait for completion of data for all the fast commit inodes */
				889	static int ext4_fc_wait_inode_data_all(journal_t *journal)
				890	{
				891	struct super_block sb = (struct super_block )(journal->j_private);
				892	struct ext4_sb_info *sbi = EXT4_SB(sb);
				893	struct ext4_inode_info pos, n;
				894	int ret = 0;
				895
				896	spin_lock(&sbi->s_fc_lock);
				897	list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
				898	if (!ext4_test_inode_state(&pos->vfs_inode,
				899	EXT4_STATE_FC_COMMITTING))
				900	continue;
				901	spin_unlock(&sbi->s_fc_lock);
				902
				903	ret = jbd2_wait_inode_data(journal, pos->jinode);
				904	if (ret)
				905	return ret;
				906	spin_lock(&sbi->s_fc_lock);
				907	}
				908	spin_unlock(&sbi->s_fc_lock);
				909
				910	return 0;
				911	}
				912
				913	/* Commit all the directory entry updates */
				914	static int ext4_fc_commit_dentry_updates(journal_t journal, u32 crc)
				915	{
				916	struct super_block sb = (struct super_block )(journal->j_private);
				917	struct ext4_sb_info *sbi = EXT4_SB(sb);
				918	struct ext4_fc_dentry_update *fc_dentry;
				919	struct inode *inode;
				920	struct list_head pos, n, fcd_pos, fcd_n;
				921	struct ext4_inode_info *ei;
				922	int ret;
				923
				924	if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
				925	return 0;
				926	list_for_each_safe(fcd_pos, fcd_n, &sbi->s_fc_dentry_q[FC_Q_MAIN]) {
				927	fc_dentry = list_entry(fcd_pos, struct ext4_fc_dentry_update,
				928	fcd_list);
				929	if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
				930	spin_unlock(&sbi->s_fc_lock);
				931	if (!ext4_fc_add_dentry_tlv(
				932	sb, fc_dentry->fcd_op,
				933	fc_dentry->fcd_parent, fc_dentry->fcd_ino,
				934	fc_dentry->fcd_name.len,
				935	fc_dentry->fcd_name.name, crc)) {
				936	ret = -ENOSPC;
				937	goto lock_and_exit;
				938	}
				939	spin_lock(&sbi->s_fc_lock);
				940	continue;
				941	}
				942
				943	inode = NULL;
				944	list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
				945	ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
				946	if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) {
				947	inode = &ei->vfs_inode;
				948	break;
				949	}
				950	}
				951	/*
				952	* If we don't find inode in our list, then it was deleted,
				953	* in which case, we don't need to record it's create tag.
				954	*/
				955	if (!inode)
				956	continue;
				957	spin_unlock(&sbi->s_fc_lock);
				958
				959	/*
				960	* We first write the inode and then the create dirent. This
				961	* allows the recovery code to create an unnamed inode first
				962	* and then link it to a directory entry. This allows us
				963	* to use namei.c routines almost as is and simplifies
				964	* the recovery code.
				965	*/
				966	ret = ext4_fc_write_inode(inode, crc);
				967	if (ret)
				968	goto lock_and_exit;
				969
				970	ret = ext4_fc_write_inode_data(inode, crc);
				971	if (ret)
				972	goto lock_and_exit;
				973
				974	if (!ext4_fc_add_dentry_tlv(
				975	sb, fc_dentry->fcd_op,
				976	fc_dentry->fcd_parent, fc_dentry->fcd_ino,
				977	fc_dentry->fcd_name.len,
				978	fc_dentry->fcd_name.name, crc)) {
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	979	ret = -ENOSPC;
				980	goto lock_and_exit;
				981	}
				982
				983	spin_lock(&sbi->s_fc_lock);
				984	}
				985	return 0;
				986	lock_and_exit:
				987	spin_lock(&sbi->s_fc_lock);
				988	return ret;
				989	}
				990
				991	static int ext4_fc_perform_commit(journal_t *journal)
				992	{
				993	struct super_block sb = (struct super_block )(journal->j_private);
				994	struct ext4_sb_info *sbi = EXT4_SB(sb);
				995	struct ext4_inode_info *iter;
				996	struct ext4_fc_head head;
				997	struct list_head *pos;
				998	struct inode *inode;
				999	struct blk_plug plug;
				1000	int ret = 0;
				1001	u32 crc = 0;
				1002
				1003	ret = ext4_fc_submit_inode_data_all(journal);
				1004	if (ret)
				1005	return ret;
				1006
				1007	ret = ext4_fc_wait_inode_data_all(journal);
				1008	if (ret)
				1009	return ret;
				1010
				1011	blk_start_plug(&plug);
				1012	if (sbi->s_fc_bytes == 0) {
				1013	/*
				1014	* Add a head tag only if this is the first fast commit
				1015	* in this TID.
				1016	*/
				1017	head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES);
				1018	head.fc_tid = cpu_to_le32(
				1019	sbi->s_journal->j_running_transaction->t_tid);
				1020	if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
				1021	(u8 *)&head, &crc))
				1022	goto out;
				1023	}
				1024
				1025	spin_lock(&sbi->s_fc_lock);
				1026	ret = ext4_fc_commit_dentry_updates(journal, &crc);
				1027	if (ret) {
				1028	spin_unlock(&sbi->s_fc_lock);
				1029	goto out;
				1030	}
				1031
				1032	list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
				1033	iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
				1034	inode = &iter->vfs_inode;
				1035	if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
				1036	continue;
				1037
				1038	spin_unlock(&sbi->s_fc_lock);
				1039	ret = ext4_fc_write_inode_data(inode, &crc);
				1040	if (ret)
				1041	goto out;
				1042	ret = ext4_fc_write_inode(inode, &crc);
				1043	if (ret)
				1044	goto out;
				1045	spin_lock(&sbi->s_fc_lock);
				1046	EXT4_I(inode)->i_fc_committed_subtid =
				1047	atomic_read(&sbi->s_fc_subtid);
				1048	}
				1049	spin_unlock(&sbi->s_fc_lock);
				1050
				1051	ret = ext4_fc_write_tail(sb, crc);
				1052
				1053	out:
				1054	blk_finish_plug(&plug);
				1055	return ret;
				1056	}
				1057
				1058	/*
				1059	* The main commit entry point. Performs a fast commit for transaction
				1060	* commit_tid if needed. If it's not possible to perform a fast commit
				1061	* due to various reasons, we fall back to full commit. Returns 0
				1062	* on success, error otherwise.
				1063	*/
				1064	int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
				1065	{
				1066	struct super_block sb = (struct super_block )(journal->j_private);
				1067	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1068	int nblks = 0, ret, bsize = journal->j_blocksize;
				1069	int subtid = atomic_read(&sbi->s_fc_subtid);
				1070	int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0;
				1071	ktime_t start_time, commit_time;
				1072
				1073	trace_ext4_fc_commit_start(sb);
				1074
				1075	start_time = ktime_get();
				1076
				1077	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) \|\|
				1078	(ext4_fc_is_ineligible(sb))) {
				1079	reason = EXT4_FC_REASON_INELIGIBLE;
				1080	goto out;
				1081	}
				1082
				1083	restart_fc:
				1084	ret = jbd2_fc_begin_commit(journal, commit_tid);
				1085	if (ret == -EALREADY) {
				1086	/* There was an ongoing commit, check if we need to restart */
				1087	if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
				1088	commit_tid > journal->j_commit_sequence)
				1089	goto restart_fc;
				1090	reason = EXT4_FC_REASON_ALREADY_COMMITTED;
				1091	goto out;
				1092	} else if (ret) {
				1093	sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
				1094	reason = EXT4_FC_REASON_FC_START_FAILED;
				1095	goto out;
				1096	}
				1097
				1098	fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
				1099	ret = ext4_fc_perform_commit(journal);
				1100	if (ret < 0) {
				1101	sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
				1102	reason = EXT4_FC_REASON_FC_FAILED;
				1103	goto out;
				1104	}
				1105	nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
				1106	ret = jbd2_fc_wait_bufs(journal, nblks);
				1107	if (ret < 0) {
				1108	sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
				1109	reason = EXT4_FC_REASON_FC_FAILED;
				1110	goto out;
				1111	}
				1112	atomic_inc(&sbi->s_fc_subtid);
				1113	jbd2_fc_end_commit(journal);
				1114	out:
				1115	/* Has any ineligible update happened since we started? */
				1116	if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) {
				1117	sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
				1118	reason = EXT4_FC_REASON_INELIGIBLE;
				1119	}
				1120
				1121	spin_lock(&sbi->s_fc_lock);
				1122	if (reason != EXT4_FC_REASON_OK &&
				1123	reason != EXT4_FC_REASON_ALREADY_COMMITTED) {
				1124	sbi->s_fc_stats.fc_ineligible_commits++;
				1125	} else {
				1126	sbi->s_fc_stats.fc_num_commits++;
				1127	sbi->s_fc_stats.fc_numblks += nblks;
				1128	}
				1129	spin_unlock(&sbi->s_fc_lock);
				1130	nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0;
				1131	trace_ext4_fc_commit_stop(sb, nblks, reason);
				1132	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
				1133	/*
				1134	* weight the commit time higher than the average time so we don't
				1135	* react too strongly to vast changes in the commit time
				1136	*/
				1137	if (likely(sbi->s_fc_avg_commit_time))
				1138	sbi->s_fc_avg_commit_time = (commit_time +
				1139	sbi->s_fc_avg_commit_time * 3) / 4;
				1140	else
				1141	sbi->s_fc_avg_commit_time = commit_time;
				1142	jbd_debug(1,
				1143	"Fast commit ended with blks = %d, reason = %d, subtid - %d",
				1144	nblks, reason, subtid);
				1145	if (reason == EXT4_FC_REASON_FC_FAILED)
Harshad Shirwadkar	0bce577	2020-11-05 19:58:58 -0800	[diff] [blame^]	1146	return jbd2_fc_end_commit_fallback(journal);
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	1147	if (reason == EXT4_FC_REASON_FC_START_FAILED \|\|
				1148	reason == EXT4_FC_REASON_INELIGIBLE)
				1149	return jbd2_complete_transaction(journal, commit_tid);
				1150	return 0;
				1151	}
				1152
Harshad Shirwadkar	ff780b9	2020-10-15 13:37:56 -0700	[diff] [blame]	1153	/*
				1154	* Fast commit cleanup routine. This is called after every fast commit and
				1155	* full commit. full is true if we are called after a full commit.
				1156	*/
				1157	static void ext4_fc_cleanup(journal_t *journal, int full)
				1158	{
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	1159	struct super_block *sb = journal->j_private;
				1160	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1161	struct ext4_inode_info *iter;
				1162	struct ext4_fc_dentry_update *fc_dentry;
				1163	struct list_head pos, n;
				1164
				1165	if (full && sbi->s_fc_bh)
				1166	sbi->s_fc_bh = NULL;
				1167
				1168	jbd2_fc_release_bufs(journal);
				1169
				1170	spin_lock(&sbi->s_fc_lock);
				1171	list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
				1172	iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
				1173	list_del_init(&iter->i_fc_list);
				1174	ext4_clear_inode_state(&iter->vfs_inode,
				1175	EXT4_STATE_FC_COMMITTING);
				1176	ext4_fc_reset_inode(&iter->vfs_inode);
				1177	/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
				1178	smp_mb();
				1179	#if (BITS_PER_LONG < 64)
				1180	wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
				1181	#else
				1182	wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
				1183	#endif
				1184	}
				1185
				1186	while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
				1187	fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
				1188	struct ext4_fc_dentry_update,
				1189	fcd_list);
				1190	list_del_init(&fc_dentry->fcd_list);
				1191	spin_unlock(&sbi->s_fc_lock);
				1192
				1193	if (fc_dentry->fcd_name.name &&
				1194	fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
				1195	kfree(fc_dentry->fcd_name.name);
				1196	kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
				1197	spin_lock(&sbi->s_fc_lock);
				1198	}
				1199
				1200	list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
				1201	&sbi->s_fc_dentry_q[FC_Q_MAIN]);
				1202	list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
				1203	&sbi->s_fc_q[FC_Q_STAGING]);
				1204
Harshad Shirwadkar	ababea7	2020-10-26 21:49:15 -0700	[diff] [blame]	1205	sbi->s_mount_flags &= ~EXT4_MF_FC_COMMITTING;
				1206	sbi->s_mount_flags &= ~EXT4_MF_FC_INELIGIBLE;
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	1207
				1208	if (full)
				1209	sbi->s_fc_bytes = 0;
				1210	spin_unlock(&sbi->s_fc_lock);
				1211	trace_ext4_fc_stats(sb);
Harshad Shirwadkar	ff780b9	2020-10-15 13:37:56 -0700	[diff] [blame]	1212	}
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	1213
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	1214	/* Ext4 Replay Path Routines */
				1215
				1216	/* Get length of a particular tlv */
				1217	static inline int ext4_fc_tag_len(struct ext4_fc_tl *tl)
				1218	{
				1219	return le16_to_cpu(tl->fc_len);
				1220	}
				1221
				1222	/* Get a pointer to "value" of a tlv */
				1223	static inline u8 ext4_fc_tag_val(struct ext4_fc_tl tl)
				1224	{
				1225	return (u8 )tl + sizeof(tl);
				1226	}
				1227
				1228	/* Helper struct for dentry replay routines */
				1229	struct dentry_info_args {
				1230	int parent_ino, dname_len, ino, inode_len;
				1231	char *dname;
				1232	};
				1233
				1234	static inline void tl_to_darg(struct dentry_info_args *darg,
				1235	struct ext4_fc_tl *tl)
				1236	{
				1237	struct ext4_fc_dentry_info *fcd;
				1238
				1239	fcd = (struct ext4_fc_dentry_info *)ext4_fc_tag_val(tl);
				1240
				1241	darg->parent_ino = le32_to_cpu(fcd->fc_parent_ino);
				1242	darg->ino = le32_to_cpu(fcd->fc_ino);
				1243	darg->dname = fcd->fc_dname;
				1244	darg->dname_len = ext4_fc_tag_len(tl) -
				1245	sizeof(struct ext4_fc_dentry_info);
				1246	}
				1247
				1248	/* Unlink replay function */
				1249	static int ext4_fc_replay_unlink(struct super_block sb, struct ext4_fc_tl tl)
				1250	{
				1251	struct inode inode, old_parent;
				1252	struct qstr entry;
				1253	struct dentry_info_args darg;
				1254	int ret = 0;
				1255
				1256	tl_to_darg(&darg, tl);
				1257
				1258	trace_ext4_fc_replay(sb, EXT4_FC_TAG_UNLINK, darg.ino,
				1259	darg.parent_ino, darg.dname_len);
				1260
				1261	entry.name = darg.dname;
				1262	entry.len = darg.dname_len;
				1263	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
				1264
				1265	if (IS_ERR_OR_NULL(inode)) {
				1266	jbd_debug(1, "Inode %d not found", darg.ino);
				1267	return 0;
				1268	}
				1269
				1270	old_parent = ext4_iget(sb, darg.parent_ino,
				1271	EXT4_IGET_NORMAL);
				1272	if (IS_ERR_OR_NULL(old_parent)) {
				1273	jbd_debug(1, "Dir with inode %d not found", darg.parent_ino);
				1274	iput(inode);
				1275	return 0;
				1276	}
				1277
Harshad Shirwadkar	a80f7fc	2020-11-05 19:58:53 -0800	[diff] [blame]	1278	ret = __ext4_unlink(NULL, old_parent, &entry, inode);
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	1279	/* -ENOENT ok coz it might not exist anymore. */
				1280	if (ret == -ENOENT)
				1281	ret = 0;
				1282	iput(old_parent);
				1283	iput(inode);
				1284	return ret;
				1285	}
				1286
				1287	static int ext4_fc_replay_link_internal(struct super_block *sb,
				1288	struct dentry_info_args *darg,
				1289	struct inode *inode)
				1290	{
				1291	struct inode *dir = NULL;
				1292	struct dentry dentry_dir = NULL, dentry_inode = NULL;
				1293	struct qstr qstr_dname = QSTR_INIT(darg->dname, darg->dname_len);
				1294	int ret = 0;
				1295
				1296	dir = ext4_iget(sb, darg->parent_ino, EXT4_IGET_NORMAL);
				1297	if (IS_ERR(dir)) {
				1298	jbd_debug(1, "Dir with inode %d not found.", darg->parent_ino);
				1299	dir = NULL;
				1300	goto out;
				1301	}
				1302
				1303	dentry_dir = d_obtain_alias(dir);
				1304	if (IS_ERR(dentry_dir)) {
				1305	jbd_debug(1, "Failed to obtain dentry");
				1306	dentry_dir = NULL;
				1307	goto out;
				1308	}
				1309
				1310	dentry_inode = d_alloc(dentry_dir, &qstr_dname);
				1311	if (!dentry_inode) {
				1312	jbd_debug(1, "Inode dentry not created.");
				1313	ret = -ENOMEM;
				1314	goto out;
				1315	}
				1316
				1317	ret = __ext4_link(dir, inode, dentry_inode);
				1318	/*
				1319	* It's possible that link already existed since data blocks
				1320	* for the dir in question got persisted before we crashed OR
				1321	* we replayed this tag and crashed before the entire replay
				1322	* could complete.
				1323	*/
				1324	if (ret && ret != -EEXIST) {
				1325	jbd_debug(1, "Failed to link\n");
				1326	goto out;
				1327	}
				1328
				1329	ret = 0;
				1330	out:
				1331	if (dentry_dir) {
				1332	d_drop(dentry_dir);
				1333	dput(dentry_dir);
				1334	} else if (dir) {
				1335	iput(dir);
				1336	}
				1337	if (dentry_inode) {
				1338	d_drop(dentry_inode);
				1339	dput(dentry_inode);
				1340	}
				1341
				1342	return ret;
				1343	}
				1344
				1345	/* Link replay function */
				1346	static int ext4_fc_replay_link(struct super_block sb, struct ext4_fc_tl tl)
				1347	{
				1348	struct inode *inode;
				1349	struct dentry_info_args darg;
				1350	int ret = 0;
				1351
				1352	tl_to_darg(&darg, tl);
				1353	trace_ext4_fc_replay(sb, EXT4_FC_TAG_LINK, darg.ino,
				1354	darg.parent_ino, darg.dname_len);
				1355
				1356	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
				1357	if (IS_ERR_OR_NULL(inode)) {
				1358	jbd_debug(1, "Inode not found.");
				1359	return 0;
				1360	}
				1361
				1362	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
				1363	iput(inode);
				1364	return ret;
				1365	}
				1366
				1367	/*
				1368	* Record all the modified inodes during replay. We use this later to setup
				1369	* block bitmaps correctly.
				1370	*/
				1371	static int ext4_fc_record_modified_inode(struct super_block *sb, int ino)
				1372	{
				1373	struct ext4_fc_replay_state *state;
				1374	int i;
				1375
				1376	state = &EXT4_SB(sb)->s_fc_replay_state;
				1377	for (i = 0; i < state->fc_modified_inodes_used; i++)
				1378	if (state->fc_modified_inodes[i] == ino)
				1379	return 0;
				1380	if (state->fc_modified_inodes_used == state->fc_modified_inodes_size) {
				1381	state->fc_modified_inodes_size +=
				1382	EXT4_FC_REPLAY_REALLOC_INCREMENT;
				1383	state->fc_modified_inodes = krealloc(
				1384	state->fc_modified_inodes, sizeof(int) *
				1385	state->fc_modified_inodes_size,
				1386	GFP_KERNEL);
				1387	if (!state->fc_modified_inodes)
				1388	return -ENOMEM;
				1389	}
				1390	state->fc_modified_inodes[state->fc_modified_inodes_used++] = ino;
				1391	return 0;
				1392	}
				1393
				1394	/*
				1395	* Inode replay function
				1396	*/
				1397	static int ext4_fc_replay_inode(struct super_block sb, struct ext4_fc_tl tl)
				1398	{
				1399	struct ext4_fc_inode *fc_inode;
				1400	struct ext4_inode *raw_inode;
				1401	struct ext4_inode *raw_fc_inode;
				1402	struct inode *inode = NULL;
				1403	struct ext4_iloc iloc;
				1404	int inode_len, ino, ret, tag = le16_to_cpu(tl->fc_tag);
				1405	struct ext4_extent_header *eh;
				1406
				1407	fc_inode = (struct ext4_fc_inode *)ext4_fc_tag_val(tl);
				1408
				1409	ino = le32_to_cpu(fc_inode->fc_ino);
				1410	trace_ext4_fc_replay(sb, tag, ino, 0, 0);
				1411
				1412	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
				1413	if (!IS_ERR_OR_NULL(inode)) {
				1414	ext4_ext_clear_bb(inode);
				1415	iput(inode);
				1416	}
				1417
				1418	ext4_fc_record_modified_inode(sb, ino);
				1419
				1420	raw_fc_inode = (struct ext4_inode *)fc_inode->fc_raw_inode;
				1421	ret = ext4_get_fc_inode_loc(sb, ino, &iloc);
				1422	if (ret)
				1423	goto out;
				1424
				1425	inode_len = ext4_fc_tag_len(tl) - sizeof(struct ext4_fc_inode);
				1426	raw_inode = ext4_raw_inode(&iloc);
				1427
				1428	memcpy(raw_inode, raw_fc_inode, offsetof(struct ext4_inode, i_block));
				1429	memcpy(&raw_inode->i_generation, &raw_fc_inode->i_generation,
				1430	inode_len - offsetof(struct ext4_inode, i_generation));
				1431	if (le32_to_cpu(raw_inode->i_flags) & EXT4_EXTENTS_FL) {
				1432	eh = (struct ext4_extent_header *)(&raw_inode->i_block[0]);
				1433	if (eh->eh_magic != EXT4_EXT_MAGIC) {
				1434	memset(eh, 0, sizeof(*eh));
				1435	eh->eh_magic = EXT4_EXT_MAGIC;
				1436	eh->eh_max = cpu_to_le16(
				1437	(sizeof(raw_inode->i_block) -
				1438	sizeof(struct ext4_extent_header))
				1439	/ sizeof(struct ext4_extent));
				1440	}
				1441	} else if (le32_to_cpu(raw_inode->i_flags) & EXT4_INLINE_DATA_FL) {
				1442	memcpy(raw_inode->i_block, raw_fc_inode->i_block,
				1443	sizeof(raw_inode->i_block));
				1444	}
				1445
				1446	/* Immediately update the inode on disk. */
				1447	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
				1448	if (ret)
				1449	goto out;
				1450	ret = sync_dirty_buffer(iloc.bh);
				1451	if (ret)
				1452	goto out;
				1453	ret = ext4_mark_inode_used(sb, ino);
				1454	if (ret)
				1455	goto out;
				1456
				1457	/* Given that we just wrote the inode on disk, this SHOULD succeed. */
				1458	inode = ext4_iget(sb, ino, EXT4_IGET_NORMAL);
				1459	if (IS_ERR_OR_NULL(inode)) {
				1460	jbd_debug(1, "Inode not found.");
				1461	return -EFSCORRUPTED;
				1462	}
				1463
				1464	/*
				1465	* Our allocator could have made different decisions than before
				1466	* crashing. This should be fixed but until then, we calculate
				1467	* the number of blocks the inode.
				1468	*/
				1469	ext4_ext_replay_set_iblocks(inode);
				1470
				1471	inode->i_generation = le32_to_cpu(ext4_raw_inode(&iloc)->i_generation);
				1472	ext4_reset_inode_seed(inode);
				1473
				1474	ext4_inode_csum_set(inode, ext4_raw_inode(&iloc), EXT4_I(inode));
				1475	ret = ext4_handle_dirty_metadata(NULL, NULL, iloc.bh);
				1476	sync_dirty_buffer(iloc.bh);
				1477	brelse(iloc.bh);
				1478	out:
				1479	iput(inode);
				1480	if (!ret)
				1481	blkdev_issue_flush(sb->s_bdev, GFP_KERNEL);
				1482
				1483	return 0;
				1484	}
				1485
				1486	/*
				1487	* Dentry create replay function.
				1488	*
				1489	* EXT4_FC_TAG_CREAT is preceded by EXT4_FC_TAG_INODE_FULL. Which means, the
				1490	* inode for which we are trying to create a dentry here, should already have
				1491	* been replayed before we start here.
				1492	*/
				1493	static int ext4_fc_replay_create(struct super_block sb, struct ext4_fc_tl tl)
				1494	{
				1495	int ret = 0;
				1496	struct inode *inode = NULL;
				1497	struct inode *dir = NULL;
				1498	struct dentry_info_args darg;
				1499
				1500	tl_to_darg(&darg, tl);
				1501
				1502	trace_ext4_fc_replay(sb, EXT4_FC_TAG_CREAT, darg.ino,
				1503	darg.parent_ino, darg.dname_len);
				1504
				1505	/* This takes care of update group descriptor and other metadata */
				1506	ret = ext4_mark_inode_used(sb, darg.ino);
				1507	if (ret)
				1508	goto out;
				1509
				1510	inode = ext4_iget(sb, darg.ino, EXT4_IGET_NORMAL);
				1511	if (IS_ERR_OR_NULL(inode)) {
				1512	jbd_debug(1, "inode %d not found.", darg.ino);
				1513	inode = NULL;
				1514	ret = -EINVAL;
				1515	goto out;
				1516	}
				1517
				1518	if (S_ISDIR(inode->i_mode)) {
				1519	/*
				1520	* If we are creating a directory, we need to make sure that the
				1521	* dot and dot dot dirents are setup properly.
				1522	*/
				1523	dir = ext4_iget(sb, darg.parent_ino, EXT4_IGET_NORMAL);
				1524	if (IS_ERR_OR_NULL(dir)) {
				1525	jbd_debug(1, "Dir %d not found.", darg.ino);
				1526	goto out;
				1527	}
				1528	ret = ext4_init_new_dir(NULL, dir, inode);
				1529	iput(dir);
				1530	if (ret) {
				1531	ret = 0;
				1532	goto out;
				1533	}
				1534	}
				1535	ret = ext4_fc_replay_link_internal(sb, &darg, inode);
				1536	if (ret)
				1537	goto out;
				1538	set_nlink(inode, 1);
				1539	ext4_mark_inode_dirty(NULL, inode);
				1540	out:
				1541	if (inode)
				1542	iput(inode);
				1543	return ret;
				1544	}
				1545
				1546	/*
				1547	* Record physical disk regions which are in use as per fast commit area. Our
				1548	* simple replay phase allocator excludes these regions from allocation.
				1549	*/
				1550	static int ext4_fc_record_regions(struct super_block *sb, int ino,
				1551	ext4_lblk_t lblk, ext4_fsblk_t pblk, int len)
				1552	{
				1553	struct ext4_fc_replay_state *state;
				1554	struct ext4_fc_alloc_region *region;
				1555
				1556	state = &EXT4_SB(sb)->s_fc_replay_state;
				1557	if (state->fc_regions_used == state->fc_regions_size) {
				1558	state->fc_regions_size +=
				1559	EXT4_FC_REPLAY_REALLOC_INCREMENT;
				1560	state->fc_regions = krealloc(
				1561	state->fc_regions,
				1562	state->fc_regions_size *
				1563	sizeof(struct ext4_fc_alloc_region),
				1564	GFP_KERNEL);
				1565	if (!state->fc_regions)
				1566	return -ENOMEM;
				1567	}
				1568	region = &state->fc_regions[state->fc_regions_used++];
				1569	region->ino = ino;
				1570	region->lblk = lblk;
				1571	region->pblk = pblk;
				1572	region->len = len;
				1573
				1574	return 0;
				1575	}
				1576
				1577	/* Replay add range tag */
				1578	static int ext4_fc_replay_add_range(struct super_block *sb,
				1579	struct ext4_fc_tl *tl)
				1580	{
				1581	struct ext4_fc_add_range *fc_add_ex;
				1582	struct ext4_extent newex, *ex;
				1583	struct inode *inode;
				1584	ext4_lblk_t start, cur;
				1585	int remaining, len;
				1586	ext4_fsblk_t start_pblk;
				1587	struct ext4_map_blocks map;
				1588	struct ext4_ext_path *path = NULL;
				1589	int ret;
				1590
				1591	fc_add_ex = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
				1592	ex = (struct ext4_extent *)&fc_add_ex->fc_ex;
				1593
				1594	trace_ext4_fc_replay(sb, EXT4_FC_TAG_ADD_RANGE,
				1595	le32_to_cpu(fc_add_ex->fc_ino), le32_to_cpu(ex->ee_block),
				1596	ext4_ext_get_actual_len(ex));
				1597
				1598	inode = ext4_iget(sb, le32_to_cpu(fc_add_ex->fc_ino),
				1599	EXT4_IGET_NORMAL);
				1600	if (IS_ERR_OR_NULL(inode)) {
				1601	jbd_debug(1, "Inode not found.");
				1602	return 0;
				1603	}
				1604
				1605	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
				1606
				1607	start = le32_to_cpu(ex->ee_block);
				1608	start_pblk = ext4_ext_pblock(ex);
				1609	len = ext4_ext_get_actual_len(ex);
				1610
				1611	cur = start;
				1612	remaining = len;
				1613	jbd_debug(1, "ADD_RANGE, lblk %d, pblk %lld, len %d, unwritten %d, inode %ld\n",
				1614	start, start_pblk, len, ext4_ext_is_unwritten(ex),
				1615	inode->i_ino);
				1616
				1617	while (remaining > 0) {
				1618	map.m_lblk = cur;
				1619	map.m_len = remaining;
				1620	map.m_pblk = 0;
				1621	ret = ext4_map_blocks(NULL, inode, &map, 0);
				1622
				1623	if (ret < 0) {
				1624	iput(inode);
				1625	return 0;
				1626	}
				1627
				1628	if (ret == 0) {
				1629	/* Range is not mapped */
				1630	path = ext4_find_extent(inode, cur, NULL, 0);
Harshad Shirwadkar	8c9be1e	2020-10-27 13:43:42 -0700	[diff] [blame]	1631	if (IS_ERR(path)) {
				1632	iput(inode);
				1633	return 0;
				1634	}
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	1635	memset(&newex, 0, sizeof(newex));
				1636	newex.ee_block = cpu_to_le32(cur);
				1637	ext4_ext_store_pblock(
				1638	&newex, start_pblk + cur - start);
				1639	newex.ee_len = cpu_to_le16(map.m_len);
				1640	if (ext4_ext_is_unwritten(ex))
				1641	ext4_ext_mark_unwritten(&newex);
				1642	down_write(&EXT4_I(inode)->i_data_sem);
				1643	ret = ext4_ext_insert_extent(
				1644	NULL, inode, &path, &newex, 0);
				1645	up_write((&EXT4_I(inode)->i_data_sem));
				1646	ext4_ext_drop_refs(path);
				1647	kfree(path);
				1648	if (ret) {
				1649	iput(inode);
				1650	return 0;
				1651	}
				1652	goto next;
				1653	}
				1654
				1655	if (start_pblk + cur - start != map.m_pblk) {
				1656	/*
				1657	* Logical to physical mapping changed. This can happen
				1658	* if this range was removed and then reallocated to
				1659	* map to new physical blocks during a fast commit.
				1660	*/
				1661	ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
				1662	ext4_ext_is_unwritten(ex),
				1663	start_pblk + cur - start);
				1664	if (ret) {
				1665	iput(inode);
				1666	return 0;
				1667	}
				1668	/*
				1669	* Mark the old blocks as free since they aren't used
				1670	* anymore. We maintain an array of all the modified
				1671	* inodes. In case these blocks are still used at either
				1672	* a different logical range in the same inode or in
				1673	* some different inode, we will mark them as allocated
				1674	* at the end of the FC replay using our array of
				1675	* modified inodes.
				1676	*/
				1677	ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
				1678	goto next;
				1679	}
				1680
				1681	/* Range is mapped and needs a state change */
				1682	jbd_debug(1, "Converting from %d to %d %lld",
				1683	map.m_flags & EXT4_MAP_UNWRITTEN,
				1684	ext4_ext_is_unwritten(ex), map.m_pblk);
				1685	ret = ext4_ext_replay_update_ex(inode, cur, map.m_len,
				1686	ext4_ext_is_unwritten(ex), map.m_pblk);
				1687	if (ret) {
				1688	iput(inode);
				1689	return 0;
				1690	}
				1691	/*
				1692	* We may have split the extent tree while toggling the state.
				1693	* Try to shrink the extent tree now.
				1694	*/
				1695	ext4_ext_replay_shrink_inode(inode, start + len);
				1696	next:
				1697	cur += map.m_len;
				1698	remaining -= map.m_len;
				1699	}
				1700	ext4_ext_replay_shrink_inode(inode, i_size_read(inode) >>
				1701	sb->s_blocksize_bits);
				1702	iput(inode);
				1703	return 0;
				1704	}
				1705
				1706	/* Replay DEL_RANGE tag */
				1707	static int
				1708	ext4_fc_replay_del_range(struct super_block sb, struct ext4_fc_tl tl)
				1709	{
				1710	struct inode *inode;
				1711	struct ext4_fc_del_range *lrange;
				1712	struct ext4_map_blocks map;
				1713	ext4_lblk_t cur, remaining;
				1714	int ret;
				1715
				1716	lrange = (struct ext4_fc_del_range *)ext4_fc_tag_val(tl);
				1717	cur = le32_to_cpu(lrange->fc_lblk);
				1718	remaining = le32_to_cpu(lrange->fc_len);
				1719
				1720	trace_ext4_fc_replay(sb, EXT4_FC_TAG_DEL_RANGE,
				1721	le32_to_cpu(lrange->fc_ino), cur, remaining);
				1722
				1723	inode = ext4_iget(sb, le32_to_cpu(lrange->fc_ino), EXT4_IGET_NORMAL);
				1724	if (IS_ERR_OR_NULL(inode)) {
				1725	jbd_debug(1, "Inode %d not found", le32_to_cpu(lrange->fc_ino));
				1726	return 0;
				1727	}
				1728
				1729	ret = ext4_fc_record_modified_inode(sb, inode->i_ino);
				1730
				1731	jbd_debug(1, "DEL_RANGE, inode %ld, lblk %d, len %d\n",
				1732	inode->i_ino, le32_to_cpu(lrange->fc_lblk),
				1733	le32_to_cpu(lrange->fc_len));
				1734	while (remaining > 0) {
				1735	map.m_lblk = cur;
				1736	map.m_len = remaining;
				1737
				1738	ret = ext4_map_blocks(NULL, inode, &map, 0);
				1739	if (ret < 0) {
				1740	iput(inode);
				1741	return 0;
				1742	}
				1743	if (ret > 0) {
				1744	remaining -= ret;
				1745	cur += ret;
				1746	ext4_mb_mark_bb(inode->i_sb, map.m_pblk, map.m_len, 0);
				1747	} else {
				1748	remaining -= map.m_len;
				1749	cur += map.m_len;
				1750	}
				1751	}
				1752
				1753	ret = ext4_punch_hole(inode,
				1754	le32_to_cpu(lrange->fc_lblk) << sb->s_blocksize_bits,
				1755	le32_to_cpu(lrange->fc_len) << sb->s_blocksize_bits);
				1756	if (ret)
				1757	jbd_debug(1, "ext4_punch_hole returned %d", ret);
				1758	ext4_ext_replay_shrink_inode(inode,
				1759	i_size_read(inode) >> sb->s_blocksize_bits);
				1760	ext4_mark_inode_dirty(NULL, inode);
				1761	iput(inode);
				1762
				1763	return 0;
				1764	}
				1765
				1766	static inline const char *tag2str(u16 tag)
				1767	{
				1768	switch (tag) {
				1769	case EXT4_FC_TAG_LINK:
				1770	return "TAG_ADD_ENTRY";
				1771	case EXT4_FC_TAG_UNLINK:
				1772	return "TAG_DEL_ENTRY";
				1773	case EXT4_FC_TAG_ADD_RANGE:
				1774	return "TAG_ADD_RANGE";
				1775	case EXT4_FC_TAG_CREAT:
				1776	return "TAG_CREAT_DENTRY";
				1777	case EXT4_FC_TAG_DEL_RANGE:
				1778	return "TAG_DEL_RANGE";
				1779	case EXT4_FC_TAG_INODE:
				1780	return "TAG_INODE";
				1781	case EXT4_FC_TAG_PAD:
				1782	return "TAG_PAD";
				1783	case EXT4_FC_TAG_TAIL:
				1784	return "TAG_TAIL";
				1785	case EXT4_FC_TAG_HEAD:
				1786	return "TAG_HEAD";
				1787	default:
				1788	return "TAG_ERROR";
				1789	}
				1790	}
				1791
				1792	static void ext4_fc_set_bitmaps_and_counters(struct super_block *sb)
				1793	{
				1794	struct ext4_fc_replay_state *state;
				1795	struct inode *inode;
				1796	struct ext4_ext_path *path = NULL;
				1797	struct ext4_map_blocks map;
				1798	int i, ret, j;
				1799	ext4_lblk_t cur, end;
				1800
				1801	state = &EXT4_SB(sb)->s_fc_replay_state;
				1802	for (i = 0; i < state->fc_modified_inodes_used; i++) {
				1803	inode = ext4_iget(sb, state->fc_modified_inodes[i],
				1804	EXT4_IGET_NORMAL);
				1805	if (IS_ERR_OR_NULL(inode)) {
				1806	jbd_debug(1, "Inode %d not found.",
				1807	state->fc_modified_inodes[i]);
				1808	continue;
				1809	}
				1810	cur = 0;
				1811	end = EXT_MAX_BLOCKS;
				1812	while (cur < end) {
				1813	map.m_lblk = cur;
				1814	map.m_len = end - cur;
				1815
				1816	ret = ext4_map_blocks(NULL, inode, &map, 0);
				1817	if (ret < 0)
				1818	break;
				1819
				1820	if (ret > 0) {
				1821	path = ext4_find_extent(inode, map.m_lblk, NULL, 0);
				1822	if (!IS_ERR_OR_NULL(path)) {
				1823	for (j = 0; j < path->p_depth; j++)
				1824	ext4_mb_mark_bb(inode->i_sb,
				1825	path[j].p_block, 1, 1);
				1826	ext4_ext_drop_refs(path);
				1827	kfree(path);
				1828	}
				1829	cur += ret;
				1830	ext4_mb_mark_bb(inode->i_sb, map.m_pblk,
				1831	map.m_len, 1);
				1832	} else {
				1833	cur = cur + (map.m_len ? map.m_len : 1);
				1834	}
				1835	}
				1836	iput(inode);
				1837	}
				1838	}
				1839
				1840	/*
				1841	* Check if block is in excluded regions for block allocation. The simple
				1842	* allocator that runs during replay phase is calls this function to see
				1843	* if it is okay to use a block.
				1844	*/
				1845	bool ext4_fc_replay_check_excluded(struct super_block *sb, ext4_fsblk_t blk)
				1846	{
				1847	int i;
				1848	struct ext4_fc_replay_state *state;
				1849
				1850	state = &EXT4_SB(sb)->s_fc_replay_state;
				1851	for (i = 0; i < state->fc_regions_valid; i++) {
				1852	if (state->fc_regions[i].ino == 0 \|\|
				1853	state->fc_regions[i].len == 0)
				1854	continue;
				1855	if (blk >= state->fc_regions[i].pblk &&
				1856	blk < state->fc_regions[i].pblk + state->fc_regions[i].len)
				1857	return true;
				1858	}
				1859	return false;
				1860	}
				1861
				1862	/* Cleanup function called after replay */
				1863	void ext4_fc_replay_cleanup(struct super_block *sb)
				1864	{
				1865	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1866
				1867	sbi->s_mount_state &= ~EXT4_FC_REPLAY;
				1868	kfree(sbi->s_fc_replay_state.fc_regions);
				1869	kfree(sbi->s_fc_replay_state.fc_modified_inodes);
				1870	}
				1871
				1872	/*
				1873	* Recovery Scan phase handler
				1874	*
				1875	* This function is called during the scan phase and is responsible
				1876	* for doing following things:
				1877	* - Make sure the fast commit area has valid tags for replay
				1878	* - Count number of tags that need to be replayed by the replay handler
				1879	* - Verify CRC
				1880	* - Create a list of excluded blocks for allocation during replay phase
				1881	*
				1882	* This function returns JBD2_FC_REPLAY_CONTINUE to indicate that SCAN is
				1883	* incomplete and JBD2 should send more blocks. It returns JBD2_FC_REPLAY_STOP
				1884	* to indicate that scan has finished and JBD2 can now start replay phase.
				1885	* It returns a negative error to indicate that there was an error. At the end
				1886	* of a successful scan phase, sbi->s_fc_replay_state.fc_replay_num_tags is set
				1887	* to indicate the number of tags that need to replayed during the replay phase.
				1888	*/
				1889	static int ext4_fc_replay_scan(journal_t *journal,
				1890	struct buffer_head *bh, int off,
				1891	tid_t expected_tid)
				1892	{
				1893	struct super_block *sb = journal->j_private;
				1894	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1895	struct ext4_fc_replay_state *state;
				1896	int ret = JBD2_FC_REPLAY_CONTINUE;
				1897	struct ext4_fc_add_range *ext;
				1898	struct ext4_fc_tl *tl;
				1899	struct ext4_fc_tail *tail;
				1900	__u8 start, end;
				1901	struct ext4_fc_head *head;
				1902	struct ext4_extent *ex;
				1903
				1904	state = &sbi->s_fc_replay_state;
				1905
				1906	start = (u8 *)bh->b_data;
				1907	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
				1908
				1909	if (state->fc_replay_expected_off == 0) {
				1910	state->fc_cur_tag = 0;
				1911	state->fc_replay_num_tags = 0;
				1912	state->fc_crc = 0;
				1913	state->fc_regions = NULL;
				1914	state->fc_regions_valid = state->fc_regions_used =
				1915	state->fc_regions_size = 0;
				1916	/* Check if we can stop early */
				1917	if (le16_to_cpu(((struct ext4_fc_tl *)start)->fc_tag)
				1918	!= EXT4_FC_TAG_HEAD)
				1919	return 0;
				1920	}
				1921
				1922	if (off != state->fc_replay_expected_off) {
				1923	ret = -EFSCORRUPTED;
				1924	goto out_err;
				1925	}
				1926
				1927	state->fc_replay_expected_off++;
				1928	fc_for_each_tl(start, end, tl) {
				1929	jbd_debug(3, "Scan phase, tag:%s, blk %lld\n",
				1930	tag2str(le16_to_cpu(tl->fc_tag)), bh->b_blocknr);
				1931	switch (le16_to_cpu(tl->fc_tag)) {
				1932	case EXT4_FC_TAG_ADD_RANGE:
				1933	ext = (struct ext4_fc_add_range *)ext4_fc_tag_val(tl);
				1934	ex = (struct ext4_extent *)&ext->fc_ex;
				1935	ret = ext4_fc_record_regions(sb,
				1936	le32_to_cpu(ext->fc_ino),
				1937	le32_to_cpu(ex->ee_block), ext4_ext_pblock(ex),
				1938	ext4_ext_get_actual_len(ex));
				1939	if (ret < 0)
				1940	break;
				1941	ret = JBD2_FC_REPLAY_CONTINUE;
				1942	fallthrough;
				1943	case EXT4_FC_TAG_DEL_RANGE:
				1944	case EXT4_FC_TAG_LINK:
				1945	case EXT4_FC_TAG_UNLINK:
				1946	case EXT4_FC_TAG_CREAT:
				1947	case EXT4_FC_TAG_INODE:
				1948	case EXT4_FC_TAG_PAD:
				1949	state->fc_cur_tag++;
				1950	state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
				1951	sizeof(*tl) + ext4_fc_tag_len(tl));
				1952	break;
				1953	case EXT4_FC_TAG_TAIL:
				1954	state->fc_cur_tag++;
				1955	tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
				1956	state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
				1957	sizeof(*tl) +
				1958	offsetof(struct ext4_fc_tail,
				1959	fc_crc));
				1960	if (le32_to_cpu(tail->fc_tid) == expected_tid &&
				1961	le32_to_cpu(tail->fc_crc) == state->fc_crc) {
				1962	state->fc_replay_num_tags = state->fc_cur_tag;
				1963	state->fc_regions_valid =
				1964	state->fc_regions_used;
				1965	} else {
				1966	ret = state->fc_replay_num_tags ?
				1967	JBD2_FC_REPLAY_STOP : -EFSBADCRC;
				1968	}
				1969	state->fc_crc = 0;
				1970	break;
				1971	case EXT4_FC_TAG_HEAD:
				1972	head = (struct ext4_fc_head *)ext4_fc_tag_val(tl);
				1973	if (le32_to_cpu(head->fc_features) &
				1974	~EXT4_FC_SUPPORTED_FEATURES) {
				1975	ret = -EOPNOTSUPP;
				1976	break;
				1977	}
				1978	if (le32_to_cpu(head->fc_tid) != expected_tid) {
				1979	ret = JBD2_FC_REPLAY_STOP;
				1980	break;
				1981	}
				1982	state->fc_cur_tag++;
				1983	state->fc_crc = ext4_chksum(sbi, state->fc_crc, tl,
				1984	sizeof(*tl) + ext4_fc_tag_len(tl));
				1985	break;
				1986	default:
				1987	ret = state->fc_replay_num_tags ?
				1988	JBD2_FC_REPLAY_STOP : -ECANCELED;
				1989	}
				1990	if (ret < 0 \|\| ret == JBD2_FC_REPLAY_STOP)
				1991	break;
				1992	}
				1993
				1994	out_err:
				1995	trace_ext4_fc_replay_scan(sb, ret, off);
				1996	return ret;
				1997	}
				1998
Harshad Shirwadkar	5b849b5	2020-10-15 13:37:58 -0700	[diff] [blame]	1999	/*
				2000	* Main recovery path entry point.
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	2001	* The meaning of return codes is similar as above.
Harshad Shirwadkar	5b849b5	2020-10-15 13:37:58 -0700	[diff] [blame]	2002	*/
				2003	static int ext4_fc_replay(journal_t journal, struct buffer_head bh,
				2004	enum passtype pass, int off, tid_t expected_tid)
				2005	{
Harshad Shirwadkar	8016e29	2020-10-15 13:37:59 -0700	[diff] [blame]	2006	struct super_block *sb = journal->j_private;
				2007	struct ext4_sb_info *sbi = EXT4_SB(sb);
				2008	struct ext4_fc_tl *tl;
				2009	__u8 start, end;
				2010	int ret = JBD2_FC_REPLAY_CONTINUE;
				2011	struct ext4_fc_replay_state *state = &sbi->s_fc_replay_state;
				2012	struct ext4_fc_tail *tail;
				2013
				2014	if (pass == PASS_SCAN) {
				2015	state->fc_current_pass = PASS_SCAN;
				2016	return ext4_fc_replay_scan(journal, bh, off, expected_tid);
				2017	}
				2018
				2019	if (state->fc_current_pass != pass) {
				2020	state->fc_current_pass = pass;
				2021	sbi->s_mount_state \|= EXT4_FC_REPLAY;
				2022	}
				2023	if (!sbi->s_fc_replay_state.fc_replay_num_tags) {
				2024	jbd_debug(1, "Replay stops\n");
				2025	ext4_fc_set_bitmaps_and_counters(sb);
				2026	return 0;
				2027	}
				2028
				2029	#ifdef CONFIG_EXT4_DEBUG
				2030	if (sbi->s_fc_debug_max_replay && off >= sbi->s_fc_debug_max_replay) {
				2031	pr_warn("Dropping fc block %d because max_replay set\n", off);
				2032	return JBD2_FC_REPLAY_STOP;
				2033	}
				2034	#endif
				2035
				2036	start = (u8 *)bh->b_data;
				2037	end = (__u8 *)bh->b_data + journal->j_blocksize - 1;
				2038
				2039	fc_for_each_tl(start, end, tl) {
				2040	if (state->fc_replay_num_tags == 0) {
				2041	ret = JBD2_FC_REPLAY_STOP;
				2042	ext4_fc_set_bitmaps_and_counters(sb);
				2043	break;
				2044	}
				2045	jbd_debug(3, "Replay phase, tag:%s\n",
				2046	tag2str(le16_to_cpu(tl->fc_tag)));
				2047	state->fc_replay_num_tags--;
				2048	switch (le16_to_cpu(tl->fc_tag)) {
				2049	case EXT4_FC_TAG_LINK:
				2050	ret = ext4_fc_replay_link(sb, tl);
				2051	break;
				2052	case EXT4_FC_TAG_UNLINK:
				2053	ret = ext4_fc_replay_unlink(sb, tl);
				2054	break;
				2055	case EXT4_FC_TAG_ADD_RANGE:
				2056	ret = ext4_fc_replay_add_range(sb, tl);
				2057	break;
				2058	case EXT4_FC_TAG_CREAT:
				2059	ret = ext4_fc_replay_create(sb, tl);
				2060	break;
				2061	case EXT4_FC_TAG_DEL_RANGE:
				2062	ret = ext4_fc_replay_del_range(sb, tl);
				2063	break;
				2064	case EXT4_FC_TAG_INODE:
				2065	ret = ext4_fc_replay_inode(sb, tl);
				2066	break;
				2067	case EXT4_FC_TAG_PAD:
				2068	trace_ext4_fc_replay(sb, EXT4_FC_TAG_PAD, 0,
				2069	ext4_fc_tag_len(tl), 0);
				2070	break;
				2071	case EXT4_FC_TAG_TAIL:
				2072	trace_ext4_fc_replay(sb, EXT4_FC_TAG_TAIL, 0,
				2073	ext4_fc_tag_len(tl), 0);
				2074	tail = (struct ext4_fc_tail *)ext4_fc_tag_val(tl);
				2075	WARN_ON(le32_to_cpu(tail->fc_tid) != expected_tid);
				2076	break;
				2077	case EXT4_FC_TAG_HEAD:
				2078	break;
				2079	default:
				2080	trace_ext4_fc_replay(sb, le16_to_cpu(tl->fc_tag), 0,
				2081	ext4_fc_tag_len(tl), 0);
				2082	ret = -ECANCELED;
				2083	break;
				2084	}
				2085	if (ret < 0)
				2086	break;
				2087	ret = JBD2_FC_REPLAY_CONTINUE;
				2088	}
				2089	return ret;
Harshad Shirwadkar	5b849b5	2020-10-15 13:37:58 -0700	[diff] [blame]	2090	}
				2091
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	2092	void ext4_fc_init(struct super_block sb, journal_t journal)
				2093	{
Harshad Shirwadkar	5b849b5	2020-10-15 13:37:58 -0700	[diff] [blame]	2094	/*
				2095	* We set replay callback even if fast commit disabled because we may
				2096	* could still have fast commit blocks that need to be replayed even if
				2097	* fast commit has now been turned off.
				2098	*/
				2099	journal->j_fc_replay_callback = ext4_fc_replay;
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	2100	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
				2101	return;
Harshad Shirwadkar	ff780b9	2020-10-15 13:37:56 -0700	[diff] [blame]	2102	journal->j_fc_cleanup_callback = ext4_fc_cleanup;
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	2103	}
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	2104
Harshad Shirwadkar	ce8c59d	2020-10-15 13:38:01 -0700	[diff] [blame]	2105	const char *fc_ineligible_reasons[] = {
				2106	"Extended attributes changed",
				2107	"Cross rename",
				2108	"Journal flag changed",
				2109	"Insufficient memory",
				2110	"Swap boot",
				2111	"Resize",
				2112	"Dir renamed",
				2113	"Falloc range op",
				2114	"FC Commit Failed"
				2115	};
				2116
				2117	int ext4_fc_info_show(struct seq_file seq, void v)
				2118	{
				2119	struct ext4_sb_info sbi = EXT4_SB((struct super_block )seq->private);
				2120	struct ext4_fc_stats *stats = &sbi->s_fc_stats;
				2121	int i;
				2122
				2123	if (v != SEQ_START_TOKEN)
				2124	return 0;
				2125
				2126	seq_printf(seq,
				2127	"fc stats:\n%ld commits\n%ld ineligible\n%ld numblks\n%lluus avg_commit_time\n",
				2128	stats->fc_num_commits, stats->fc_ineligible_commits,
				2129	stats->fc_numblks,
				2130	div_u64(sbi->s_fc_avg_commit_time, 1000));
				2131	seq_puts(seq, "Ineligible reasons:\n");
				2132	for (i = 0; i < EXT4_FC_REASON_MAX; i++)
				2133	seq_printf(seq, "\"%s\":\t%d\n", fc_ineligible_reasons[i],
				2134	stats->fc_ineligible_reason_count[i]);
				2135
				2136	return 0;
				2137	}
				2138
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	2139	int __init ext4_fc_init_dentry_cache(void)
				2140	{
				2141	ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
				2142	SLAB_RECLAIM_ACCOUNT);
				2143
				2144	if (ext4_fc_dentry_cachep == NULL)
				2145	return -ENOMEM;
				2146
				2147	return 0;
				2148	}