Blame - fs/ext4/fast_commit.c - linux-5.10

blob: 888d9d217d5bdfe99056da00bb0366be34a32339 [file] [log] [blame]

Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
				2
				3	/*
				4	* fs/ext4/fast_commit.c
				5	*
				6	* Written by Harshad Shirwadkar <harshadshirwadkar@gmail.com>
				7	*
				8	* Ext4 fast commits routines.
				9	*/
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	10	#include "ext4.h"
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	11	#include "ext4_jbd2.h"
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	12	#include "ext4_extents.h"
				13	#include "mballoc.h"
				14
				15	/*
				16	* Ext4 Fast Commits
				17	* -----------------
				18	*
				19	* Ext4 fast commits implement fine grained journalling for Ext4.
				20	*
				21	* Fast commits are organized as a log of tag-length-value (TLV) structs. (See
				22	* struct ext4_fc_tl). Each TLV contains some delta that is replayed TLV by
				23	* TLV during the recovery phase. For the scenarios for which we currently
				24	* don't have replay code, fast commit falls back to full commits.
				25	* Fast commits record delta in one of the following three categories.
				26	*
				27	* (A) Directory entry updates:
				28	*
				29	* - EXT4_FC_TAG_UNLINK - records directory entry unlink
				30	* - EXT4_FC_TAG_LINK - records directory entry link
				31	* - EXT4_FC_TAG_CREAT - records inode and directory entry creation
				32	*
				33	* (B) File specific data range updates:
				34	*
				35	* - EXT4_FC_TAG_ADD_RANGE - records addition of new blocks to an inode
				36	* - EXT4_FC_TAG_DEL_RANGE - records deletion of blocks from an inode
				37	*
				38	* (C) Inode metadata (mtime / ctime etc):
				39	*
				40	* - EXT4_FC_TAG_INODE - record the inode that should be replayed
				41	* during recovery. Note that iblocks field is
				42	* not replayed and instead derived during
				43	* replay.
				44	* Commit Operation
				45	* ----------------
				46	* With fast commits, we maintain all the directory entry operations in the
				47	* order in which they are issued in an in-memory queue. This queue is flushed
				48	* to disk during the commit operation. We also maintain a list of inodes
				49	* that need to be committed during a fast commit in another in memory queue of
				50	* inodes. During the commit operation, we commit in the following order:
				51	*
				52	* [1] Lock inodes for any further data updates by setting COMMITTING state
				53	* [2] Submit data buffers of all the inodes
				54	* [3] Wait for [2] to complete
				55	* [4] Commit all the directory entry updates in the fast commit space
				56	* [5] Commit all the changed inode structures
				57	* [6] Write tail tag (this tag ensures the atomicity, please read the following
				58	* section for more details).
				59	* [7] Wait for [4], [5] and [6] to complete.
				60	*
				61	* All the inode updates must call ext4_fc_start_update() before starting an
				62	* update. If such an ongoing update is present, fast commit waits for it to
				63	* complete. The completion of such an update is marked by
				64	* ext4_fc_stop_update().
				65	*
				66	* Fast Commit Ineligibility
				67	* -------------------------
				68	* Not all operations are supported by fast commits today (e.g extended
				69	* attributes). Fast commit ineligiblity is marked by calling one of the
				70	* two following functions:
				71	*
				72	* - ext4_fc_mark_ineligible(): This makes next fast commit operation to fall
				73	* back to full commit. This is useful in case of transient errors.
				74	*
				75	* - ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() - This makes all
				76	* the fast commits happening between ext4_fc_start_ineligible() and
				77	* ext4_fc_stop_ineligible() and one fast commit after the call to
				78	* ext4_fc_stop_ineligible() to fall back to full commits. It is important to
				79	* make one more fast commit to fall back to full commit after stop call so
				80	* that it guaranteed that the fast commit ineligible operation contained
				81	* within ext4_fc_start_ineligible() and ext4_fc_stop_ineligible() is
				82	* followed by at least 1 full commit.
				83	*
				84	* Atomicity of commits
				85	* --------------------
				86	* In order to gaurantee atomicity during the commit operation, fast commit
				87	* uses "EXT4_FC_TAG_TAIL" tag that marks a fast commit as complete. Tail
				88	* tag contains CRC of the contents and TID of the transaction after which
				89	* this fast commit should be applied. Recovery code replays fast commit
				90	* logs only if there's at least 1 valid tail present. For every fast commit
				91	* operation, there is 1 tail. This means, we may end up with multiple tails
				92	* in the fast commit space. Here's an example:
				93	*
				94	* - Create a new file A and remove existing file B
				95	* - fsync()
				96	* - Append contents to file A
				97	* - Truncate file A
				98	* - fsync()
				99	*
				100	* The fast commit space at the end of above operations would look like this:
				101	* [HEAD] [CREAT A] [UNLINK B] [TAIL] [ADD_RANGE A] [DEL_RANGE A] [TAIL]
				102	* \|<--- Fast Commit 1 --->\|<--- Fast Commit 2 ---->\|
				103	*
				104	* Replay code should thus check for all the valid tails in the FC area.
				105	*
				106	* TODOs
				107	* -----
				108	* 1) Make fast commit atomic updates more fine grained. Today, a fast commit
				109	* eligible update must be protected within ext4_fc_start_update() and
				110	* ext4_fc_stop_update(). These routines are called at much higher
				111	* routines. This can be made more fine grained by combining with
				112	* ext4_journal_start().
				113	*
				114	* 2) Same above for ext4_fc_start_ineligible() and ext4_fc_stop_ineligible()
				115	*
				116	* 3) Handle more ineligible cases.
				117	*/
				118
				119	#include <trace/events/ext4.h>
				120	static struct kmem_cache *ext4_fc_dentry_cachep;
				121
				122	static void ext4_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
				123	{
				124	BUFFER_TRACE(bh, "");
				125	if (uptodate) {
				126	ext4_debug("%s: Block %lld up-to-date",
				127	__func__, bh->b_blocknr);
				128	set_buffer_uptodate(bh);
				129	} else {
				130	ext4_debug("%s: Block %lld not up-to-date",
				131	__func__, bh->b_blocknr);
				132	clear_buffer_uptodate(bh);
				133	}
				134
				135	unlock_buffer(bh);
				136	}
				137
				138	static inline void ext4_fc_reset_inode(struct inode *inode)
				139	{
				140	struct ext4_inode_info *ei = EXT4_I(inode);
				141
				142	ei->i_fc_lblk_start = 0;
				143	ei->i_fc_lblk_len = 0;
				144	}
				145
				146	void ext4_fc_init_inode(struct inode *inode)
				147	{
				148	struct ext4_inode_info *ei = EXT4_I(inode);
				149
				150	ext4_fc_reset_inode(inode);
				151	ext4_clear_inode_state(inode, EXT4_STATE_FC_COMMITTING);
				152	INIT_LIST_HEAD(&ei->i_fc_list);
				153	init_waitqueue_head(&ei->i_fc_wait);
				154	atomic_set(&ei->i_fc_updates, 0);
				155	ei->i_fc_committed_subtid = 0;
				156	}
				157
				158	/*
				159	* Inform Ext4's fast about start of an inode update
				160	*
				161	* This function is called by the high level call VFS callbacks before
				162	* performing any inode update. This function blocks if there's an ongoing
				163	* fast commit on the inode in question.
				164	*/
				165	void ext4_fc_start_update(struct inode *inode)
				166	{
				167	struct ext4_inode_info *ei = EXT4_I(inode);
				168
				169	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT))
				170	return;
				171
				172	restart:
				173	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				174	if (list_empty(&ei->i_fc_list))
				175	goto out;
				176
				177	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
				178	wait_queue_head_t *wq;
				179	#if (BITS_PER_LONG < 64)
				180	DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
				181	EXT4_STATE_FC_COMMITTING);
				182	wq = bit_waitqueue(&ei->i_state_flags,
				183	EXT4_STATE_FC_COMMITTING);
				184	#else
				185	DEFINE_WAIT_BIT(wait, &ei->i_flags,
				186	EXT4_STATE_FC_COMMITTING);
				187	wq = bit_waitqueue(&ei->i_flags,
				188	EXT4_STATE_FC_COMMITTING);
				189	#endif
				190	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
				191	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				192	schedule();
				193	finish_wait(wq, &wait.wq_entry);
				194	goto restart;
				195	}
				196	out:
				197	atomic_inc(&ei->i_fc_updates);
				198	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				199	}
				200
				201	/*
				202	* Stop inode update and wake up waiting fast commits if any.
				203	*/
				204	void ext4_fc_stop_update(struct inode *inode)
				205	{
				206	struct ext4_inode_info *ei = EXT4_I(inode);
				207
				208	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT))
				209	return;
				210
				211	if (atomic_dec_and_test(&ei->i_fc_updates))
				212	wake_up_all(&ei->i_fc_wait);
				213	}
				214
				215	/*
				216	* Remove inode from fast commit list. If the inode is being committed
				217	* we wait until inode commit is done.
				218	*/
				219	void ext4_fc_del(struct inode *inode)
				220	{
				221	struct ext4_inode_info *ei = EXT4_I(inode);
				222
				223	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT))
				224	return;
				225
				226
				227	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT))
				228	return;
				229
				230	restart:
				231	spin_lock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				232	if (list_empty(&ei->i_fc_list)) {
				233	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				234	return;
				235	}
				236
				237	if (ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING)) {
				238	wait_queue_head_t *wq;
				239	#if (BITS_PER_LONG < 64)
				240	DEFINE_WAIT_BIT(wait, &ei->i_state_flags,
				241	EXT4_STATE_FC_COMMITTING);
				242	wq = bit_waitqueue(&ei->i_state_flags,
				243	EXT4_STATE_FC_COMMITTING);
				244	#else
				245	DEFINE_WAIT_BIT(wait, &ei->i_flags,
				246	EXT4_STATE_FC_COMMITTING);
				247	wq = bit_waitqueue(&ei->i_flags,
				248	EXT4_STATE_FC_COMMITTING);
				249	#endif
				250	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
				251	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				252	schedule();
				253	finish_wait(wq, &wait.wq_entry);
				254	goto restart;
				255	}
				256	if (!list_empty(&ei->i_fc_list))
				257	list_del_init(&ei->i_fc_list);
				258	spin_unlock(&EXT4_SB(inode->i_sb)->s_fc_lock);
				259	}
				260
				261	/*
				262	* Mark file system as fast commit ineligible. This means that next commit
				263	* operation would result in a full jbd2 commit.
				264	*/
				265	void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
				266	{
				267	struct ext4_sb_info *sbi = EXT4_SB(sb);
				268
				269	sbi->s_mount_state \|= EXT4_FC_INELIGIBLE;
				270	WARN_ON(reason >= EXT4_FC_REASON_MAX);
				271	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
				272	}
				273
				274	/*
				275	* Start a fast commit ineligible update. Any commits that happen while
				276	* such an operation is in progress fall back to full commits.
				277	*/
				278	void ext4_fc_start_ineligible(struct super_block *sb, int reason)
				279	{
				280	struct ext4_sb_info *sbi = EXT4_SB(sb);
				281
				282	WARN_ON(reason >= EXT4_FC_REASON_MAX);
				283	sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
				284	atomic_inc(&sbi->s_fc_ineligible_updates);
				285	}
				286
				287	/*
				288	* Stop a fast commit ineligible update. We set EXT4_FC_INELIGIBLE flag here
				289	* to ensure that after stopping the ineligible update, at least one full
				290	* commit takes place.
				291	*/
				292	void ext4_fc_stop_ineligible(struct super_block *sb)
				293	{
				294	EXT4_SB(sb)->s_mount_state \|= EXT4_FC_INELIGIBLE;
				295	atomic_dec(&EXT4_SB(sb)->s_fc_ineligible_updates);
				296	}
				297
				298	static inline int ext4_fc_is_ineligible(struct super_block *sb)
				299	{
				300	return (EXT4_SB(sb)->s_mount_state & EXT4_FC_INELIGIBLE) \|\|
				301	atomic_read(&EXT4_SB(sb)->s_fc_ineligible_updates);
				302	}
				303
				304	/*
				305	* Generic fast commit tracking function. If this is the first time this we are
				306	* called after a full commit, we initialize fast commit fields and then call
				307	* __fc_track_fn() with update = 0. If we have already been called after a full
				308	* commit, we pass update = 1. Based on that, the track function can determine
				309	* if it needs to track a field for the first time or if it needs to just
				310	* update the previously tracked value.
				311	*
				312	* If enqueue is set, this function enqueues the inode in fast commit list.
				313	*/
				314	static int ext4_fc_track_template(
				315	struct inode inode, int (__fc_track_fn)(struct inode , void , bool),
				316	void *args, int enqueue)
				317	{
				318	tid_t running_txn_tid;
				319	bool update = false;
				320	struct ext4_inode_info *ei = EXT4_I(inode);
				321	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
				322	int ret;
				323
				324	if (!test_opt2(inode->i_sb, JOURNAL_FAST_COMMIT))
				325	return -EOPNOTSUPP;
				326
				327	if (ext4_fc_is_ineligible(inode->i_sb))
				328	return -EINVAL;
				329
				330	running_txn_tid = sbi->s_journal ?
				331	sbi->s_journal->j_commit_sequence + 1 : 0;
				332
				333	mutex_lock(&ei->i_fc_lock);
				334	if (running_txn_tid == ei->i_sync_tid) {
				335	update = true;
				336	} else {
				337	ext4_fc_reset_inode(inode);
				338	ei->i_sync_tid = running_txn_tid;
				339	}
				340	ret = __fc_track_fn(inode, args, update);
				341	mutex_unlock(&ei->i_fc_lock);
				342
				343	if (!enqueue)
				344	return ret;
				345
				346	spin_lock(&sbi->s_fc_lock);
				347	if (list_empty(&EXT4_I(inode)->i_fc_list))
				348	list_add_tail(&EXT4_I(inode)->i_fc_list,
				349	(sbi->s_mount_state & EXT4_FC_COMMITTING) ?
				350	&sbi->s_fc_q[FC_Q_STAGING] :
				351	&sbi->s_fc_q[FC_Q_MAIN]);
				352	spin_unlock(&sbi->s_fc_lock);
				353
				354	return ret;
				355	}
				356
				357	struct __track_dentry_update_args {
				358	struct dentry *dentry;
				359	int op;
				360	};
				361
				362	/* __track_fn for directory entry updates. Called with ei->i_fc_lock. */
				363	static int __track_dentry_update(struct inode inode, void arg, bool update)
				364	{
				365	struct ext4_fc_dentry_update *node;
				366	struct ext4_inode_info *ei = EXT4_I(inode);
				367	struct __track_dentry_update_args *dentry_update =
				368	(struct __track_dentry_update_args *)arg;
				369	struct dentry *dentry = dentry_update->dentry;
				370	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
				371
				372	mutex_unlock(&ei->i_fc_lock);
				373	node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
				374	if (!node) {
				375	ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_MEM);
				376	mutex_lock(&ei->i_fc_lock);
				377	return -ENOMEM;
				378	}
				379
				380	node->fcd_op = dentry_update->op;
				381	node->fcd_parent = dentry->d_parent->d_inode->i_ino;
				382	node->fcd_ino = inode->i_ino;
				383	if (dentry->d_name.len > DNAME_INLINE_LEN) {
				384	node->fcd_name.name = kmalloc(dentry->d_name.len, GFP_NOFS);
				385	if (!node->fcd_name.name) {
				386	kmem_cache_free(ext4_fc_dentry_cachep, node);
				387	ext4_fc_mark_ineligible(inode->i_sb,
				388	EXT4_FC_REASON_MEM);
				389	mutex_lock(&ei->i_fc_lock);
				390	return -ENOMEM;
				391	}
				392	memcpy((u8 *)node->fcd_name.name, dentry->d_name.name,
				393	dentry->d_name.len);
				394	} else {
				395	memcpy(node->fcd_iname, dentry->d_name.name,
				396	dentry->d_name.len);
				397	node->fcd_name.name = node->fcd_iname;
				398	}
				399	node->fcd_name.len = dentry->d_name.len;
				400
				401	spin_lock(&sbi->s_fc_lock);
				402	if (sbi->s_mount_state & EXT4_FC_COMMITTING)
				403	list_add_tail(&node->fcd_list,
				404	&sbi->s_fc_dentry_q[FC_Q_STAGING]);
				405	else
				406	list_add_tail(&node->fcd_list, &sbi->s_fc_dentry_q[FC_Q_MAIN]);
				407	spin_unlock(&sbi->s_fc_lock);
				408	mutex_lock(&ei->i_fc_lock);
				409
				410	return 0;
				411	}
				412
				413	void ext4_fc_track_unlink(struct inode inode, struct dentry dentry)
				414	{
				415	struct __track_dentry_update_args args;
				416	int ret;
				417
				418	args.dentry = dentry;
				419	args.op = EXT4_FC_TAG_UNLINK;
				420
				421	ret = ext4_fc_track_template(inode, __track_dentry_update,
				422	(void *)&args, 0);
				423	trace_ext4_fc_track_unlink(inode, dentry, ret);
				424	}
				425
				426	void ext4_fc_track_link(struct inode inode, struct dentry dentry)
				427	{
				428	struct __track_dentry_update_args args;
				429	int ret;
				430
				431	args.dentry = dentry;
				432	args.op = EXT4_FC_TAG_LINK;
				433
				434	ret = ext4_fc_track_template(inode, __track_dentry_update,
				435	(void *)&args, 0);
				436	trace_ext4_fc_track_link(inode, dentry, ret);
				437	}
				438
				439	void ext4_fc_track_create(struct inode inode, struct dentry dentry)
				440	{
				441	struct __track_dentry_update_args args;
				442	int ret;
				443
				444	args.dentry = dentry;
				445	args.op = EXT4_FC_TAG_CREAT;
				446
				447	ret = ext4_fc_track_template(inode, __track_dentry_update,
				448	(void *)&args, 0);
				449	trace_ext4_fc_track_create(inode, dentry, ret);
				450	}
				451
				452	/* __track_fn for inode tracking */
				453	static int __track_inode(struct inode inode, void arg, bool update)
				454	{
				455	if (update)
				456	return -EEXIST;
				457
				458	EXT4_I(inode)->i_fc_lblk_len = 0;
				459
				460	return 0;
				461	}
				462
				463	void ext4_fc_track_inode(struct inode *inode)
				464	{
				465	int ret;
				466
				467	if (S_ISDIR(inode->i_mode))
				468	return;
				469
				470	ret = ext4_fc_track_template(inode, __track_inode, NULL, 1);
				471	trace_ext4_fc_track_inode(inode, ret);
				472	}
				473
				474	struct __track_range_args {
				475	ext4_lblk_t start, end;
				476	};
				477
				478	/* __track_fn for tracking data updates */
				479	static int __track_range(struct inode inode, void arg, bool update)
				480	{
				481	struct ext4_inode_info *ei = EXT4_I(inode);
				482	ext4_lblk_t oldstart;
				483	struct __track_range_args *__arg =
				484	(struct __track_range_args *)arg;
				485
				486	if (inode->i_ino < EXT4_FIRST_INO(inode->i_sb)) {
				487	ext4_debug("Special inode %ld being modified\n", inode->i_ino);
				488	return -ECANCELED;
				489	}
				490
				491	oldstart = ei->i_fc_lblk_start;
				492
				493	if (update && ei->i_fc_lblk_len > 0) {
				494	ei->i_fc_lblk_start = min(ei->i_fc_lblk_start, __arg->start);
				495	ei->i_fc_lblk_len =
				496	max(oldstart + ei->i_fc_lblk_len - 1, __arg->end) -
				497	ei->i_fc_lblk_start + 1;
				498	} else {
				499	ei->i_fc_lblk_start = __arg->start;
				500	ei->i_fc_lblk_len = __arg->end - __arg->start + 1;
				501	}
				502
				503	return 0;
				504	}
				505
				506	void ext4_fc_track_range(struct inode *inode, ext4_lblk_t start,
				507	ext4_lblk_t end)
				508	{
				509	struct __track_range_args args;
				510	int ret;
				511
				512	if (S_ISDIR(inode->i_mode))
				513	return;
				514
				515	args.start = start;
				516	args.end = end;
				517
				518	ret = ext4_fc_track_template(inode, __track_range, &args, 1);
				519
				520	trace_ext4_fc_track_range(inode, start, end, ret);
				521	}
				522
				523	static void ext4_fc_submit_bh(struct super_block *sb)
				524	{
				525	int write_flags = REQ_SYNC;
				526	struct buffer_head *bh = EXT4_SB(sb)->s_fc_bh;
				527
				528	if (test_opt(sb, BARRIER))
				529	write_flags \|= REQ_FUA \| REQ_PREFLUSH;
				530	lock_buffer(bh);
				531	clear_buffer_dirty(bh);
				532	set_buffer_uptodate(bh);
				533	bh->b_end_io = ext4_end_buffer_io_sync;
				534	submit_bh(REQ_OP_WRITE, write_flags, bh);
				535	EXT4_SB(sb)->s_fc_bh = NULL;
				536	}
				537
				538	/* Ext4 commit path routines */
				539
				540	/* memzero and update CRC */
				541	static void ext4_fc_memzero(struct super_block sb, void *dst, int len,
				542	u32 *crc)
				543	{
				544	void *ret;
				545
				546	ret = memset(dst, 0, len);
				547	if (crc)
				548	crc = ext4_chksum(EXT4_SB(sb), crc, dst, len);
				549	return ret;
				550	}
				551
				552	/*
				553	* Allocate len bytes on a fast commit buffer.
				554	*
				555	* During the commit time this function is used to manage fast commit
				556	* block space. We don't split a fast commit log onto different
				557	* blocks. So this function makes sure that if there's not enough space
				558	* on the current block, the remaining space in the current block is
				559	* marked as unused by adding EXT4_FC_TAG_PAD tag. In that case,
				560	* new block is from jbd2 and CRC is updated to reflect the padding
				561	* we added.
				562	*/
				563	static u8 ext4_fc_reserve_space(struct super_block sb, int len, u32 *crc)
				564	{
				565	struct ext4_fc_tl *tl;
				566	struct ext4_sb_info *sbi = EXT4_SB(sb);
				567	struct buffer_head *bh;
				568	int bsize = sbi->s_journal->j_blocksize;
				569	int ret, off = sbi->s_fc_bytes % bsize;
				570	int pad_len;
				571
				572	/*
				573	* After allocating len, we should have space at least for a 0 byte
				574	* padding.
				575	*/
				576	if (len + sizeof(struct ext4_fc_tl) > bsize)
				577	return NULL;
				578
				579	if (bsize - off - 1 > len + sizeof(struct ext4_fc_tl)) {
				580	/*
				581	* Only allocate from current buffer if we have enough space for
				582	* this request AND we have space to add a zero byte padding.
				583	*/
				584	if (!sbi->s_fc_bh) {
				585	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
				586	if (ret)
				587	return NULL;
				588	sbi->s_fc_bh = bh;
				589	}
				590	sbi->s_fc_bytes += len;
				591	return sbi->s_fc_bh->b_data + off;
				592	}
				593	/* Need to add PAD tag */
				594	tl = (struct ext4_fc_tl *)(sbi->s_fc_bh->b_data + off);
				595	tl->fc_tag = cpu_to_le16(EXT4_FC_TAG_PAD);
				596	pad_len = bsize - off - 1 - sizeof(struct ext4_fc_tl);
				597	tl->fc_len = cpu_to_le16(pad_len);
				598	if (crc)
				599	crc = ext4_chksum(sbi, crc, tl, sizeof(*tl));
				600	if (pad_len > 0)
				601	ext4_fc_memzero(sb, tl + 1, pad_len, crc);
				602	ext4_fc_submit_bh(sb);
				603
				604	ret = jbd2_fc_get_buf(EXT4_SB(sb)->s_journal, &bh);
				605	if (ret)
				606	return NULL;
				607	sbi->s_fc_bh = bh;
				608	sbi->s_fc_bytes = (sbi->s_fc_bytes / bsize + 1) * bsize + len;
				609	return sbi->s_fc_bh->b_data;
				610	}
				611
				612	/* memcpy to fc reserved space and update CRC */
				613	static void ext4_fc_memcpy(struct super_block sb, void dst, const void src,
				614	int len, u32 *crc)
				615	{
				616	if (crc)
				617	crc = ext4_chksum(EXT4_SB(sb), crc, src, len);
				618	return memcpy(dst, src, len);
				619	}
				620
				621	/*
				622	* Complete a fast commit by writing tail tag.
				623	*
				624	* Writing tail tag marks the end of a fast commit. In order to guarantee
				625	* atomicity, after writing tail tag, even if there's space remaining
				626	* in the block, next commit shouldn't use it. That's why tail tag
				627	* has the length as that of the remaining space on the block.
				628	*/
				629	static int ext4_fc_write_tail(struct super_block *sb, u32 crc)
				630	{
				631	struct ext4_sb_info *sbi = EXT4_SB(sb);
				632	struct ext4_fc_tl tl;
				633	struct ext4_fc_tail tail;
				634	int off, bsize = sbi->s_journal->j_blocksize;
				635	u8 *dst;
				636
				637	/*
				638	* ext4_fc_reserve_space takes care of allocating an extra block if
				639	* there's no enough space on this block for accommodating this tail.
				640	*/
				641	dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(tail), &crc);
				642	if (!dst)
				643	return -ENOSPC;
				644
				645	off = sbi->s_fc_bytes % bsize;
				646
				647	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_TAIL);
				648	tl.fc_len = cpu_to_le16(bsize - off - 1 + sizeof(struct ext4_fc_tail));
				649	sbi->s_fc_bytes = round_up(sbi->s_fc_bytes, bsize);
				650
				651	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), &crc);
				652	dst += sizeof(tl);
				653	tail.fc_tid = cpu_to_le32(sbi->s_journal->j_running_transaction->t_tid);
				654	ext4_fc_memcpy(sb, dst, &tail.fc_tid, sizeof(tail.fc_tid), &crc);
				655	dst += sizeof(tail.fc_tid);
				656	tail.fc_crc = cpu_to_le32(crc);
				657	ext4_fc_memcpy(sb, dst, &tail.fc_crc, sizeof(tail.fc_crc), NULL);
				658
				659	ext4_fc_submit_bh(sb);
				660
				661	return 0;
				662	}
				663
				664	/*
				665	* Adds tag, length, value and updates CRC. Returns true if tlv was added.
				666	* Returns false if there's not enough space.
				667	*/
				668	static bool ext4_fc_add_tlv(struct super_block sb, u16 tag, u16 len, u8 val,
				669	u32 *crc)
				670	{
				671	struct ext4_fc_tl tl;
				672	u8 *dst;
				673
				674	dst = ext4_fc_reserve_space(sb, sizeof(tl) + len, crc);
				675	if (!dst)
				676	return false;
				677
				678	tl.fc_tag = cpu_to_le16(tag);
				679	tl.fc_len = cpu_to_le16(len);
				680
				681	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
				682	ext4_fc_memcpy(sb, dst + sizeof(tl), val, len, crc);
				683
				684	return true;
				685	}
				686
				687	/* Same as above, but adds dentry tlv. */
				688	static bool ext4_fc_add_dentry_tlv(struct super_block *sb, u16 tag,
				689	int parent_ino, int ino, int dlen,
				690	const unsigned char *dname,
				691	u32 *crc)
				692	{
				693	struct ext4_fc_dentry_info fcd;
				694	struct ext4_fc_tl tl;
				695	u8 *dst = ext4_fc_reserve_space(sb, sizeof(tl) + sizeof(fcd) + dlen,
				696	crc);
				697
				698	if (!dst)
				699	return false;
				700
				701	fcd.fc_parent_ino = cpu_to_le32(parent_ino);
				702	fcd.fc_ino = cpu_to_le32(ino);
				703	tl.fc_tag = cpu_to_le16(tag);
				704	tl.fc_len = cpu_to_le16(sizeof(fcd) + dlen);
				705	ext4_fc_memcpy(sb, dst, &tl, sizeof(tl), crc);
				706	dst += sizeof(tl);
				707	ext4_fc_memcpy(sb, dst, &fcd, sizeof(fcd), crc);
				708	dst += sizeof(fcd);
				709	ext4_fc_memcpy(sb, dst, dname, dlen, crc);
				710	dst += dlen;
				711
				712	return true;
				713	}
				714
				715	/*
				716	* Writes inode in the fast commit space under TLV with tag @tag.
				717	* Returns 0 on success, error on failure.
				718	*/
				719	static int ext4_fc_write_inode(struct inode inode, u32 crc)
				720	{
				721	struct ext4_inode_info *ei = EXT4_I(inode);
				722	int inode_len = EXT4_GOOD_OLD_INODE_SIZE;
				723	int ret;
				724	struct ext4_iloc iloc;
				725	struct ext4_fc_inode fc_inode;
				726	struct ext4_fc_tl tl;
				727	u8 *dst;
				728
				729	ret = ext4_get_inode_loc(inode, &iloc);
				730	if (ret)
				731	return ret;
				732
				733	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE)
				734	inode_len += ei->i_extra_isize;
				735
				736	fc_inode.fc_ino = cpu_to_le32(inode->i_ino);
				737	tl.fc_tag = cpu_to_le16(EXT4_FC_TAG_INODE);
				738	tl.fc_len = cpu_to_le16(inode_len + sizeof(fc_inode.fc_ino));
				739
				740	dst = ext4_fc_reserve_space(inode->i_sb,
				741	sizeof(tl) + inode_len + sizeof(fc_inode.fc_ino), crc);
				742	if (!dst)
				743	return -ECANCELED;
				744
				745	if (!ext4_fc_memcpy(inode->i_sb, dst, &tl, sizeof(tl), crc))
				746	return -ECANCELED;
				747	dst += sizeof(tl);
				748	if (!ext4_fc_memcpy(inode->i_sb, dst, &fc_inode, sizeof(fc_inode), crc))
				749	return -ECANCELED;
				750	dst += sizeof(fc_inode);
				751	if (!ext4_fc_memcpy(inode->i_sb, dst, (u8 *)ext4_raw_inode(&iloc),
				752	inode_len, crc))
				753	return -ECANCELED;
				754
				755	return 0;
				756	}
				757
				758	/*
				759	* Writes updated data ranges for the inode in question. Updates CRC.
				760	* Returns 0 on success, error otherwise.
				761	*/
				762	static int ext4_fc_write_inode_data(struct inode inode, u32 crc)
				763	{
				764	ext4_lblk_t old_blk_size, cur_lblk_off, new_blk_size;
				765	struct ext4_inode_info *ei = EXT4_I(inode);
				766	struct ext4_map_blocks map;
				767	struct ext4_fc_add_range fc_ext;
				768	struct ext4_fc_del_range lrange;
				769	struct ext4_extent *ex;
				770	int ret;
				771
				772	mutex_lock(&ei->i_fc_lock);
				773	if (ei->i_fc_lblk_len == 0) {
				774	mutex_unlock(&ei->i_fc_lock);
				775	return 0;
				776	}
				777	old_blk_size = ei->i_fc_lblk_start;
				778	new_blk_size = ei->i_fc_lblk_start + ei->i_fc_lblk_len - 1;
				779	ei->i_fc_lblk_len = 0;
				780	mutex_unlock(&ei->i_fc_lock);
				781
				782	cur_lblk_off = old_blk_size;
				783	jbd_debug(1, "%s: will try writing %d to %d for inode %ld\n",
				784	__func__, cur_lblk_off, new_blk_size, inode->i_ino);
				785
				786	while (cur_lblk_off <= new_blk_size) {
				787	map.m_lblk = cur_lblk_off;
				788	map.m_len = new_blk_size - cur_lblk_off + 1;
				789	ret = ext4_map_blocks(NULL, inode, &map, 0);
				790	if (ret < 0)
				791	return -ECANCELED;
				792
				793	if (map.m_len == 0) {
				794	cur_lblk_off++;
				795	continue;
				796	}
				797
				798	if (ret == 0) {
				799	lrange.fc_ino = cpu_to_le32(inode->i_ino);
				800	lrange.fc_lblk = cpu_to_le32(map.m_lblk);
				801	lrange.fc_len = cpu_to_le32(map.m_len);
				802	if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_DEL_RANGE,
				803	sizeof(lrange), (u8 *)&lrange, crc))
				804	return -ENOSPC;
				805	} else {
				806	fc_ext.fc_ino = cpu_to_le32(inode->i_ino);
				807	ex = (struct ext4_extent *)&fc_ext.fc_ex;
				808	ex->ee_block = cpu_to_le32(map.m_lblk);
				809	ex->ee_len = cpu_to_le16(map.m_len);
				810	ext4_ext_store_pblock(ex, map.m_pblk);
				811	if (map.m_flags & EXT4_MAP_UNWRITTEN)
				812	ext4_ext_mark_unwritten(ex);
				813	else
				814	ext4_ext_mark_initialized(ex);
				815	if (!ext4_fc_add_tlv(inode->i_sb, EXT4_FC_TAG_ADD_RANGE,
				816	sizeof(fc_ext), (u8 *)&fc_ext, crc))
				817	return -ENOSPC;
				818	}
				819
				820	cur_lblk_off += map.m_len;
				821	}
				822
				823	return 0;
				824	}
				825
				826
				827	/* Submit data for all the fast commit inodes */
				828	static int ext4_fc_submit_inode_data_all(journal_t *journal)
				829	{
				830	struct super_block sb = (struct super_block )(journal->j_private);
				831	struct ext4_sb_info *sbi = EXT4_SB(sb);
				832	struct ext4_inode_info *ei;
				833	struct list_head *pos;
				834	int ret = 0;
				835
				836	spin_lock(&sbi->s_fc_lock);
				837	sbi->s_mount_state \|= EXT4_FC_COMMITTING;
				838	list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
				839	ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
				840	ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
				841	while (atomic_read(&ei->i_fc_updates)) {
				842	DEFINE_WAIT(wait);
				843
				844	prepare_to_wait(&ei->i_fc_wait, &wait,
				845	TASK_UNINTERRUPTIBLE);
				846	if (atomic_read(&ei->i_fc_updates)) {
				847	spin_unlock(&sbi->s_fc_lock);
				848	schedule();
				849	spin_lock(&sbi->s_fc_lock);
				850	}
				851	finish_wait(&ei->i_fc_wait, &wait);
				852	}
				853	spin_unlock(&sbi->s_fc_lock);
				854	ret = jbd2_submit_inode_data(ei->jinode);
				855	if (ret)
				856	return ret;
				857	spin_lock(&sbi->s_fc_lock);
				858	}
				859	spin_unlock(&sbi->s_fc_lock);
				860
				861	return ret;
				862	}
				863
				864	/* Wait for completion of data for all the fast commit inodes */
				865	static int ext4_fc_wait_inode_data_all(journal_t *journal)
				866	{
				867	struct super_block sb = (struct super_block )(journal->j_private);
				868	struct ext4_sb_info *sbi = EXT4_SB(sb);
				869	struct ext4_inode_info pos, n;
				870	int ret = 0;
				871
				872	spin_lock(&sbi->s_fc_lock);
				873	list_for_each_entry_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
				874	if (!ext4_test_inode_state(&pos->vfs_inode,
				875	EXT4_STATE_FC_COMMITTING))
				876	continue;
				877	spin_unlock(&sbi->s_fc_lock);
				878
				879	ret = jbd2_wait_inode_data(journal, pos->jinode);
				880	if (ret)
				881	return ret;
				882	spin_lock(&sbi->s_fc_lock);
				883	}
				884	spin_unlock(&sbi->s_fc_lock);
				885
				886	return 0;
				887	}
				888
				889	/* Commit all the directory entry updates */
				890	static int ext4_fc_commit_dentry_updates(journal_t journal, u32 crc)
				891	{
				892	struct super_block sb = (struct super_block )(journal->j_private);
				893	struct ext4_sb_info *sbi = EXT4_SB(sb);
				894	struct ext4_fc_dentry_update *fc_dentry;
				895	struct inode *inode;
				896	struct list_head pos, n, fcd_pos, fcd_n;
				897	struct ext4_inode_info *ei;
				898	int ret;
				899
				900	if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
				901	return 0;
				902	list_for_each_safe(fcd_pos, fcd_n, &sbi->s_fc_dentry_q[FC_Q_MAIN]) {
				903	fc_dentry = list_entry(fcd_pos, struct ext4_fc_dentry_update,
				904	fcd_list);
				905	if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
				906	spin_unlock(&sbi->s_fc_lock);
				907	if (!ext4_fc_add_dentry_tlv(
				908	sb, fc_dentry->fcd_op,
				909	fc_dentry->fcd_parent, fc_dentry->fcd_ino,
				910	fc_dentry->fcd_name.len,
				911	fc_dentry->fcd_name.name, crc)) {
				912	ret = -ENOSPC;
				913	goto lock_and_exit;
				914	}
				915	spin_lock(&sbi->s_fc_lock);
				916	continue;
				917	}
				918
				919	inode = NULL;
				920	list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
				921	ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
				922	if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) {
				923	inode = &ei->vfs_inode;
				924	break;
				925	}
				926	}
				927	/*
				928	* If we don't find inode in our list, then it was deleted,
				929	* in which case, we don't need to record it's create tag.
				930	*/
				931	if (!inode)
				932	continue;
				933	spin_unlock(&sbi->s_fc_lock);
				934
				935	/*
				936	* We first write the inode and then the create dirent. This
				937	* allows the recovery code to create an unnamed inode first
				938	* and then link it to a directory entry. This allows us
				939	* to use namei.c routines almost as is and simplifies
				940	* the recovery code.
				941	*/
				942	ret = ext4_fc_write_inode(inode, crc);
				943	if (ret)
				944	goto lock_and_exit;
				945
				946	ret = ext4_fc_write_inode_data(inode, crc);
				947	if (ret)
				948	goto lock_and_exit;
				949
				950	if (!ext4_fc_add_dentry_tlv(
				951	sb, fc_dentry->fcd_op,
				952	fc_dentry->fcd_parent, fc_dentry->fcd_ino,
				953	fc_dentry->fcd_name.len,
				954	fc_dentry->fcd_name.name, crc)) {
				955	spin_lock(&sbi->s_fc_lock);
				956	ret = -ENOSPC;
				957	goto lock_and_exit;
				958	}
				959
				960	spin_lock(&sbi->s_fc_lock);
				961	}
				962	return 0;
				963	lock_and_exit:
				964	spin_lock(&sbi->s_fc_lock);
				965	return ret;
				966	}
				967
				968	static int ext4_fc_perform_commit(journal_t *journal)
				969	{
				970	struct super_block sb = (struct super_block )(journal->j_private);
				971	struct ext4_sb_info *sbi = EXT4_SB(sb);
				972	struct ext4_inode_info *iter;
				973	struct ext4_fc_head head;
				974	struct list_head *pos;
				975	struct inode *inode;
				976	struct blk_plug plug;
				977	int ret = 0;
				978	u32 crc = 0;
				979
				980	ret = ext4_fc_submit_inode_data_all(journal);
				981	if (ret)
				982	return ret;
				983
				984	ret = ext4_fc_wait_inode_data_all(journal);
				985	if (ret)
				986	return ret;
				987
				988	blk_start_plug(&plug);
				989	if (sbi->s_fc_bytes == 0) {
				990	/*
				991	* Add a head tag only if this is the first fast commit
				992	* in this TID.
				993	*/
				994	head.fc_features = cpu_to_le32(EXT4_FC_SUPPORTED_FEATURES);
				995	head.fc_tid = cpu_to_le32(
				996	sbi->s_journal->j_running_transaction->t_tid);
				997	if (!ext4_fc_add_tlv(sb, EXT4_FC_TAG_HEAD, sizeof(head),
				998	(u8 *)&head, &crc))
				999	goto out;
				1000	}
				1001
				1002	spin_lock(&sbi->s_fc_lock);
				1003	ret = ext4_fc_commit_dentry_updates(journal, &crc);
				1004	if (ret) {
				1005	spin_unlock(&sbi->s_fc_lock);
				1006	goto out;
				1007	}
				1008
				1009	list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
				1010	iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
				1011	inode = &iter->vfs_inode;
				1012	if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
				1013	continue;
				1014
				1015	spin_unlock(&sbi->s_fc_lock);
				1016	ret = ext4_fc_write_inode_data(inode, &crc);
				1017	if (ret)
				1018	goto out;
				1019	ret = ext4_fc_write_inode(inode, &crc);
				1020	if (ret)
				1021	goto out;
				1022	spin_lock(&sbi->s_fc_lock);
				1023	EXT4_I(inode)->i_fc_committed_subtid =
				1024	atomic_read(&sbi->s_fc_subtid);
				1025	}
				1026	spin_unlock(&sbi->s_fc_lock);
				1027
				1028	ret = ext4_fc_write_tail(sb, crc);
				1029
				1030	out:
				1031	blk_finish_plug(&plug);
				1032	return ret;
				1033	}
				1034
				1035	/*
				1036	* The main commit entry point. Performs a fast commit for transaction
				1037	* commit_tid if needed. If it's not possible to perform a fast commit
				1038	* due to various reasons, we fall back to full commit. Returns 0
				1039	* on success, error otherwise.
				1040	*/
				1041	int ext4_fc_commit(journal_t *journal, tid_t commit_tid)
				1042	{
				1043	struct super_block sb = (struct super_block )(journal->j_private);
				1044	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1045	int nblks = 0, ret, bsize = journal->j_blocksize;
				1046	int subtid = atomic_read(&sbi->s_fc_subtid);
				1047	int reason = EXT4_FC_REASON_OK, fc_bufs_before = 0;
				1048	ktime_t start_time, commit_time;
				1049
				1050	trace_ext4_fc_commit_start(sb);
				1051
				1052	start_time = ktime_get();
				1053
				1054	if (!test_opt2(sb, JOURNAL_FAST_COMMIT) \|\|
				1055	(ext4_fc_is_ineligible(sb))) {
				1056	reason = EXT4_FC_REASON_INELIGIBLE;
				1057	goto out;
				1058	}
				1059
				1060	restart_fc:
				1061	ret = jbd2_fc_begin_commit(journal, commit_tid);
				1062	if (ret == -EALREADY) {
				1063	/* There was an ongoing commit, check if we need to restart */
				1064	if (atomic_read(&sbi->s_fc_subtid) <= subtid &&
				1065	commit_tid > journal->j_commit_sequence)
				1066	goto restart_fc;
				1067	reason = EXT4_FC_REASON_ALREADY_COMMITTED;
				1068	goto out;
				1069	} else if (ret) {
				1070	sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
				1071	reason = EXT4_FC_REASON_FC_START_FAILED;
				1072	goto out;
				1073	}
				1074
				1075	fc_bufs_before = (sbi->s_fc_bytes + bsize - 1) / bsize;
				1076	ret = ext4_fc_perform_commit(journal);
				1077	if (ret < 0) {
				1078	sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
				1079	reason = EXT4_FC_REASON_FC_FAILED;
				1080	goto out;
				1081	}
				1082	nblks = (sbi->s_fc_bytes + bsize - 1) / bsize - fc_bufs_before;
				1083	ret = jbd2_fc_wait_bufs(journal, nblks);
				1084	if (ret < 0) {
				1085	sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
				1086	reason = EXT4_FC_REASON_FC_FAILED;
				1087	goto out;
				1088	}
				1089	atomic_inc(&sbi->s_fc_subtid);
				1090	jbd2_fc_end_commit(journal);
				1091	out:
				1092	/* Has any ineligible update happened since we started? */
				1093	if (reason == EXT4_FC_REASON_OK && ext4_fc_is_ineligible(sb)) {
				1094	sbi->s_fc_stats.fc_ineligible_reason_count[EXT4_FC_COMMIT_FAILED]++;
				1095	reason = EXT4_FC_REASON_INELIGIBLE;
				1096	}
				1097
				1098	spin_lock(&sbi->s_fc_lock);
				1099	if (reason != EXT4_FC_REASON_OK &&
				1100	reason != EXT4_FC_REASON_ALREADY_COMMITTED) {
				1101	sbi->s_fc_stats.fc_ineligible_commits++;
				1102	} else {
				1103	sbi->s_fc_stats.fc_num_commits++;
				1104	sbi->s_fc_stats.fc_numblks += nblks;
				1105	}
				1106	spin_unlock(&sbi->s_fc_lock);
				1107	nblks = (reason == EXT4_FC_REASON_OK) ? nblks : 0;
				1108	trace_ext4_fc_commit_stop(sb, nblks, reason);
				1109	commit_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
				1110	/*
				1111	* weight the commit time higher than the average time so we don't
				1112	* react too strongly to vast changes in the commit time
				1113	*/
				1114	if (likely(sbi->s_fc_avg_commit_time))
				1115	sbi->s_fc_avg_commit_time = (commit_time +
				1116	sbi->s_fc_avg_commit_time * 3) / 4;
				1117	else
				1118	sbi->s_fc_avg_commit_time = commit_time;
				1119	jbd_debug(1,
				1120	"Fast commit ended with blks = %d, reason = %d, subtid - %d",
				1121	nblks, reason, subtid);
				1122	if (reason == EXT4_FC_REASON_FC_FAILED)
				1123	return jbd2_fc_end_commit_fallback(journal, commit_tid);
				1124	if (reason == EXT4_FC_REASON_FC_START_FAILED \|\|
				1125	reason == EXT4_FC_REASON_INELIGIBLE)
				1126	return jbd2_complete_transaction(journal, commit_tid);
				1127	return 0;
				1128	}
				1129
Harshad Shirwadkar	ff780b9	2020-10-15 13:37:56 -0700	[diff] [blame]	1130	/*
				1131	* Fast commit cleanup routine. This is called after every fast commit and
				1132	* full commit. full is true if we are called after a full commit.
				1133	*/
				1134	static void ext4_fc_cleanup(journal_t *journal, int full)
				1135	{
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	1136	struct super_block *sb = journal->j_private;
				1137	struct ext4_sb_info *sbi = EXT4_SB(sb);
				1138	struct ext4_inode_info *iter;
				1139	struct ext4_fc_dentry_update *fc_dentry;
				1140	struct list_head pos, n;
				1141
				1142	if (full && sbi->s_fc_bh)
				1143	sbi->s_fc_bh = NULL;
				1144
				1145	jbd2_fc_release_bufs(journal);
				1146
				1147	spin_lock(&sbi->s_fc_lock);
				1148	list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
				1149	iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
				1150	list_del_init(&iter->i_fc_list);
				1151	ext4_clear_inode_state(&iter->vfs_inode,
				1152	EXT4_STATE_FC_COMMITTING);
				1153	ext4_fc_reset_inode(&iter->vfs_inode);
				1154	/* Make sure EXT4_STATE_FC_COMMITTING bit is clear */
				1155	smp_mb();
				1156	#if (BITS_PER_LONG < 64)
				1157	wake_up_bit(&iter->i_state_flags, EXT4_STATE_FC_COMMITTING);
				1158	#else
				1159	wake_up_bit(&iter->i_flags, EXT4_STATE_FC_COMMITTING);
				1160	#endif
				1161	}
				1162
				1163	while (!list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN])) {
				1164	fc_dentry = list_first_entry(&sbi->s_fc_dentry_q[FC_Q_MAIN],
				1165	struct ext4_fc_dentry_update,
				1166	fcd_list);
				1167	list_del_init(&fc_dentry->fcd_list);
				1168	spin_unlock(&sbi->s_fc_lock);
				1169
				1170	if (fc_dentry->fcd_name.name &&
				1171	fc_dentry->fcd_name.len > DNAME_INLINE_LEN)
				1172	kfree(fc_dentry->fcd_name.name);
				1173	kmem_cache_free(ext4_fc_dentry_cachep, fc_dentry);
				1174	spin_lock(&sbi->s_fc_lock);
				1175	}
				1176
				1177	list_splice_init(&sbi->s_fc_dentry_q[FC_Q_STAGING],
				1178	&sbi->s_fc_dentry_q[FC_Q_MAIN]);
				1179	list_splice_init(&sbi->s_fc_q[FC_Q_STAGING],
				1180	&sbi->s_fc_q[FC_Q_STAGING]);
				1181
				1182	sbi->s_mount_state &= ~EXT4_FC_COMMITTING;
				1183	sbi->s_mount_state &= ~EXT4_FC_INELIGIBLE;
				1184
				1185	if (full)
				1186	sbi->s_fc_bytes = 0;
				1187	spin_unlock(&sbi->s_fc_lock);
				1188	trace_ext4_fc_stats(sb);
Harshad Shirwadkar	ff780b9	2020-10-15 13:37:56 -0700	[diff] [blame]	1189	}
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	1190
Harshad Shirwadkar	5b849b5	2020-10-15 13:37:58 -0700	[diff] [blame^]	1191	/*
				1192	* Main recovery path entry point.
				1193	*/
				1194	static int ext4_fc_replay(journal_t journal, struct buffer_head bh,
				1195	enum passtype pass, int off, tid_t expected_tid)
				1196	{
				1197	return 0;
				1198	}
				1199
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	1200	void ext4_fc_init(struct super_block sb, journal_t journal)
				1201	{
Harshad Shirwadkar	5b849b5	2020-10-15 13:37:58 -0700	[diff] [blame^]	1202	/*
				1203	* We set replay callback even if fast commit disabled because we may
				1204	* could still have fast commit blocks that need to be replayed even if
				1205	* fast commit has now been turned off.
				1206	*/
				1207	journal->j_fc_replay_callback = ext4_fc_replay;
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	1208	if (!test_opt2(sb, JOURNAL_FAST_COMMIT))
				1209	return;
Harshad Shirwadkar	ff780b9	2020-10-15 13:37:56 -0700	[diff] [blame]	1210	journal->j_fc_cleanup_callback = ext4_fc_cleanup;
Harshad Shirwadkar	6866d7b	2020-10-15 13:37:55 -0700	[diff] [blame]	1211	if (jbd2_fc_init(journal, EXT4_NUM_FC_BLKS)) {
				1212	pr_warn("Error while enabling fast commits, turning off.");
				1213	ext4_clear_feature_fast_commit(sb);
				1214	}
				1215	}
Harshad Shirwadkar	aa75f4d	2020-10-15 13:37:57 -0700	[diff] [blame]	1216
				1217	int __init ext4_fc_init_dentry_cache(void)
				1218	{
				1219	ext4_fc_dentry_cachep = KMEM_CACHE(ext4_fc_dentry_update,
				1220	SLAB_RECLAIM_ACCOUNT);
				1221
				1222	if (ext4_fc_dentry_cachep == NULL)
				1223	return -ENOMEM;
				1224
				1225	return 0;
				1226	}