Blame - arch/x86/mm/mpx.c - linux-5.10

blob: 0d1c47cbbdd68b3a969f6cd1ac347251f0812e78 [file] [log] [blame]

Greg Kroah-Hartman	b244131	2017-11-01 15:07:57 +0100	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	2	/*
				3	* mpx.c - Memory Protection eXtensions
				4	*
				5	* Copyright (c) 2014, Intel Corporation.
				6	* Qiaowei Ren <qiaowei.ren@intel.com>
				7	* Dave Hansen <dave.hansen@intel.com>
				8	*/
				9	#include <linux/kernel.h>
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	10	#include <linux/slab.h>
Ingo Molnar	589ee62	2017-02-04 00:16:44 +0100	[diff] [blame]	11	#include <linux/mm_types.h>
Michael S. Tsirkin	e844f2c	2019-02-08 01:02:53 -0500	[diff] [blame]	12	#include <linux/mman.h>
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	13	#include <linux/syscalls.h>
				14	#include <linux/sched/sysctl.h>
				15
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	16	#include <asm/insn.h>
Ricardo Neri	32542ee	2017-10-27 13:25:36 -0700	[diff] [blame]	17	#include <asm/insn-eval.h>
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	18	#include <asm/mmu_context.h>
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	19	#include <asm/mpx.h>
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	20	#include <asm/processor.h>
Ingo Molnar	78f7f1e	2015-04-24 02:54:44 +0200	[diff] [blame]	21	#include <asm/fpu/internal.h>
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	22
Dave Hansen	e7126cf	2015-06-07 11:37:03 -0700	[diff] [blame]	23	#define CREATE_TRACE_POINTS
				24	#include <asm/trace/mpx.h>
				25
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	26	static inline unsigned long mpx_bd_size_bytes(struct mm_struct *mm)
				27	{
				28	if (is_64bit_mm(mm))
				29	return MPX_BD_SIZE_BYTES_64;
				30	else
				31	return MPX_BD_SIZE_BYTES_32;
				32	}
				33
				34	static inline unsigned long mpx_bt_size_bytes(struct mm_struct *mm)
				35	{
				36	if (is_64bit_mm(mm))
				37	return MPX_BT_SIZE_BYTES_64;
				38	else
				39	return MPX_BT_SIZE_BYTES_32;
				40	}
				41
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	42	/*
				43	* This is really a simplified "vm_mmap". it only handles MPX
				44	* bounds tables (the bounds directory is user-allocated).
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	45	*/
				46	static unsigned long mpx_mmap(unsigned long len)
				47	{
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	48	struct mm_struct *mm = current->mm;
Oleg Nesterov	1fcfd8d	2015-09-09 15:39:29 -0700	[diff] [blame]	49	unsigned long addr, populate;
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	50
Dave Hansen	eb099e5	2015-06-07 11:37:02 -0700	[diff] [blame]	51	/* Only bounds table can be allocated here */
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	52	if (len != mpx_bt_size_bytes(mm))
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	53	return -EINVAL;
				54
				55	down_write(&mm->mmap_sem);
Oleg Nesterov	1fcfd8d	2015-09-09 15:39:29 -0700	[diff] [blame]	56	addr = do_mmap(NULL, 0, len, PROT_READ \| PROT_WRITE,
Mike Rapoport	897ab3e	2017-02-24 14:58:22 -0800	[diff] [blame]	57	MAP_ANONYMOUS \| MAP_PRIVATE, VM_MPX, 0, &populate, NULL);
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	58	up_write(&mm->mmap_sem);
Oleg Nesterov	1fcfd8d	2015-09-09 15:39:29 -0700	[diff] [blame]	59	if (populate)
				60	mm_populate(addr, populate);
				61
				62	return addr;
Qiaowei Ren	57319d8	2014-11-14 07:18:27 -0800	[diff] [blame]	63	}
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	64
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	65	static int mpx_insn_decode(struct insn *insn,
				66	struct pt_regs *regs)
				67	{
				68	unsigned char buf[MAX_INSN_SIZE];
				69	int x86_64 = !test_thread_flag(TIF_IA32);
				70	int not_copied;
				71	int nr_copied;
				72
				73	not_copied = copy_from_user(buf, (void __user *)regs->ip, sizeof(buf));
				74	nr_copied = sizeof(buf) - not_copied;
				75	/*
				76	* The decoder _should_ fail nicely if we pass it a short buffer.
				77	* But, let's not depend on that implementation detail. If we
				78	* did not get anything, just error out now.
				79	*/
				80	if (!nr_copied)
				81	return -EFAULT;
				82	insn_init(insn, buf, nr_copied, x86_64);
				83	insn_get_length(insn);
				84	/*
				85	* copy_from_user() tries to get as many bytes as we could see in
				86	* the largest possible instruction. If the instruction we are
				87	* after is shorter than that _and_ we attempt to copy from
				88	* something unreadable, we might get a short read. This is OK
				89	* as long as the read did not stop in the middle of the
				90	* instruction. Check to see if we got a partial instruction.
				91	*/
				92	if (nr_copied < insn->length)
				93	return -EFAULT;
				94
				95	insn_get_opcode(insn);
				96	/*
				97	* We only _really_ need to decode bndcl/bndcn/bndcu
				98	* Error out on anything else.
				99	*/
				100	if (insn->opcode.bytes[0] != 0x0f)
				101	goto bad_opcode;
				102	if ((insn->opcode.bytes[1] != 0x1a) &&
				103	(insn->opcode.bytes[1] != 0x1b))
				104	goto bad_opcode;
				105
				106	return 0;
				107	bad_opcode:
				108	return -EINVAL;
				109	}
				110
				111	/*
				112	* If a bounds overflow occurs then a #BR is generated. This
				113	* function decodes MPX instructions to get violation address
				114	* and set this address into extended struct siginfo.
				115	*
				116	* Note that this is not a super precise way of doing this.
				117	* Userspace could have, by the time we get here, written
				118	* anything it wants in to the instructions. We can not
				119	* trust anything about it. They might not be valid
				120	* instructions or might encode invalid registers, etc...
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	121	*/
Eric W. Biederman	8d68fa0	2018-01-03 19:22:04 -0600	[diff] [blame]	122	int mpx_fault_info(struct mpx_fault_info info, struct pt_regs regs)
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	123	{
Dave Hansen	1126cb45	2015-09-02 16:31:29 -0700	[diff] [blame]	124	const struct mpx_bndreg_state *bndregs;
				125	const struct mpx_bndreg *bndreg;
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	126	struct insn insn;
				127	uint8_t bndregno;
				128	int err;
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	129
				130	err = mpx_insn_decode(&insn, regs);
				131	if (err)
				132	goto err_out;
				133
				134	/*
				135	* We know at this point that we are only dealing with
				136	* MPX instructions.
				137	*/
				138	insn_get_modrm(&insn);
				139	bndregno = X86_MODRM_REG(insn.modrm.value);
				140	if (bndregno > 3) {
				141	err = -EINVAL;
				142	goto err_out;
				143	}
Dave Hansen	a84eeaa	2015-06-07 11:37:01 -0700	[diff] [blame]	144	/* get bndregs field from current task's xsave area */
Sebastian Andrzej Siewior	abd16d6	2019-04-03 18:41:40 +0200	[diff] [blame]	145	bndregs = get_xsave_field_ptr(XFEATURE_BNDREGS);
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	146	if (!bndregs) {
				147	err = -EINVAL;
				148	goto err_out;
				149	}
				150	/* now go select the individual register in the set of 4 */
Dave Hansen	1126cb45	2015-09-02 16:31:29 -0700	[diff] [blame]	151	bndreg = &bndregs->bndreg[bndregno];
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	152
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	153	/*
				154	* The registers are always 64-bit, but the upper 32
				155	* bits are ignored in 32-bit mode. Also, note that the
				156	* upper bounds are architecturally represented in 1's
				157	* complement form.
				158	*
				159	* The 'unsigned long' cast is because the compiler
				160	* complains when casting from integers to different-size
				161	* pointers.
				162	*/
Eric W. Biederman	8d68fa0	2018-01-03 19:22:04 -0600	[diff] [blame]	163	info->lower = (void __user *)(unsigned long)bndreg->lower_bound;
				164	info->upper = (void __user *)(unsigned long)~bndreg->upper_bound;
				165	info->addr = insn_get_addr_ref(&insn, regs);
				166
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	167	/*
				168	* We were not able to extract an address from the instruction,
				169	* probably because there was something invalid in it.
				170	*/
Eric W. Biederman	8d68fa0	2018-01-03 19:22:04 -0600	[diff] [blame]	171	if (info->addr == (void __user *)-1) {
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	172	err = -EINVAL;
				173	goto err_out;
				174	}
Eric W. Biederman	8d68fa0	2018-01-03 19:22:04 -0600	[diff] [blame]	175	trace_mpx_bounds_register_exception(info->addr, bndreg);
				176	return 0;
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	177	err_out:
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	178	/* info might be NULL, but kfree() handles that */
Eric W. Biederman	8d68fa0	2018-01-03 19:22:04 -0600	[diff] [blame]	179	return err;
Dave Hansen	fcc7ffd	2014-11-14 07:18:28 -0800	[diff] [blame]	180	}
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	181
Dave Hansen	46a6e0c	2015-06-07 11:37:02 -0700	[diff] [blame]	182	static __user void *mpx_get_bounds_dir(void)
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	183	{
Dave Hansen	1126cb45	2015-09-02 16:31:29 -0700	[diff] [blame]	184	const struct mpx_bndcsr *bndcsr;
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	185
				186	if (!cpu_feature_enabled(X86_FEATURE_MPX))
				187	return MPX_INVALID_BOUNDS_DIR;
				188
				189	/*
				190	* The bounds directory pointer is stored in a register
				191	* only accessible if we first do an xsave.
				192	*/
Sebastian Andrzej Siewior	abd16d6	2019-04-03 18:41:40 +0200	[diff] [blame]	193	bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	194	if (!bndcsr)
				195	return MPX_INVALID_BOUNDS_DIR;
				196
				197	/*
				198	* Make sure the register looks valid by checking the
				199	* enable bit.
				200	*/
				201	if (!(bndcsr->bndcfgu & MPX_BNDCFG_ENABLE_FLAG))
				202	return MPX_INVALID_BOUNDS_DIR;
				203
				204	/*
				205	* Lastly, mask off the low bits used for configuration
				206	* flags, and return the address of the bounds table.
				207	*/
				208	return (void __user *)(unsigned long)
				209	(bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK);
				210	}
				211
Dave Hansen	46a6e0c	2015-06-07 11:37:02 -0700	[diff] [blame]	212	int mpx_enable_management(void)
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	213	{
				214	void __user *bd_base = MPX_INVALID_BOUNDS_DIR;
Dave Hansen	46a6e0c	2015-06-07 11:37:02 -0700	[diff] [blame]	215	struct mm_struct *mm = current->mm;
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	216	int ret = 0;
				217
				218	/*
				219	* runtime in the userspace will be responsible for allocation of
				220	* the bounds directory. Then, it will save the base of the bounds
				221	* directory into XSAVE/XRSTOR Save Area and enable MPX through
				222	* XRSTOR instruction.
				223	*
Dave Hansen	a84eeaa	2015-06-07 11:37:01 -0700	[diff] [blame]	224	* The copy_xregs_to_kernel() beneath get_xsave_field_ptr() is
				225	* expected to be relatively expensive. Storing the bounds
				226	* directory here means that we do not have to do xsave in the
Mark Rutland	cb02de9	2016-12-16 12:40:55 +0000	[diff] [blame]	227	* unmap path; we can just use mm->context.bd_addr instead.
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	228	*/
Dave Hansen	46a6e0c	2015-06-07 11:37:02 -0700	[diff] [blame]	229	bd_base = mpx_get_bounds_dir();
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	230	down_write(&mm->mmap_sem);
Kirill A. Shutemov	44b0491	2017-07-17 01:59:51 +0300	[diff] [blame]	231
				232	/* MPX doesn't support addresses above 47 bits yet. */
				233	if (find_vma(mm, DEFAULT_MAP_WINDOW)) {
				234	pr_warn_once("%s (%d): MPX cannot handle addresses "
				235	"above 47-bits. Disabling.",
				236	current->comm, current->pid);
				237	ret = -ENXIO;
				238	goto out;
				239	}
Mark Rutland	cb02de9	2016-12-16 12:40:55 +0000	[diff] [blame]	240	mm->context.bd_addr = bd_base;
				241	if (mm->context.bd_addr == MPX_INVALID_BOUNDS_DIR)
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	242	ret = -ENXIO;
Kirill A. Shutemov	44b0491	2017-07-17 01:59:51 +0300	[diff] [blame]	243	out:
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	244	up_write(&mm->mmap_sem);
				245	return ret;
				246	}
				247
Dave Hansen	46a6e0c	2015-06-07 11:37:02 -0700	[diff] [blame]	248	int mpx_disable_management(void)
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	249	{
				250	struct mm_struct *mm = current->mm;
				251
				252	if (!cpu_feature_enabled(X86_FEATURE_MPX))
				253	return -ENXIO;
				254
				255	down_write(&mm->mmap_sem);
Mark Rutland	cb02de9	2016-12-16 12:40:55 +0000	[diff] [blame]	256	mm->context.bd_addr = MPX_INVALID_BOUNDS_DIR;
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	257	up_write(&mm->mmap_sem);
				258	return 0;
				259	}
				260
Dave Hansen	6ac52bb	2015-06-07 11:37:05 -0700	[diff] [blame]	261	static int mpx_cmpxchg_bd_entry(struct mm_struct *mm,
				262	unsigned long *curval,
				263	unsigned long __user *addr,
				264	unsigned long old_val, unsigned long new_val)
				265	{
				266	int ret;
				267	/*
				268	* user_atomic_cmpxchg_inatomic() actually uses sizeof()
				269	* the pointer that we pass to it to figure out how much
				270	* data to cmpxchg. We have to be careful here not to
				271	* pass a pointer to a 64-bit data type when we only want
				272	* a 32-bit copy.
				273	*/
				274	if (is_64bit_mm(mm)) {
				275	ret = user_atomic_cmpxchg_inatomic(curval,
				276	addr, old_val, new_val);
				277	} else {
				278	u32 uninitialized_var(curval_32);
				279	u32 old_val_32 = old_val;
				280	u32 new_val_32 = new_val;
				281	u32 __user addr_32 = (u32 __user )addr;
				282
				283	ret = user_atomic_cmpxchg_inatomic(&curval_32,
				284	addr_32, old_val_32, new_val_32);
				285	*curval = curval_32;
				286	}
				287	return ret;
				288	}
				289
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	290	/*
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	291	* With 32-bit mode, a bounds directory is 4MB, and the size of each
				292	* bounds table is 16KB. With 64-bit mode, a bounds directory is 2GB,
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	293	* and the size of each bounds table is 4MB.
				294	*/
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	295	static int allocate_bt(struct mm_struct mm, long __user bd_entry)
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	296	{
				297	unsigned long expected_old_val = 0;
				298	unsigned long actual_old_val = 0;
				299	unsigned long bt_addr;
Dave Hansen	a1149fc	2015-06-07 11:37:04 -0700	[diff] [blame]	300	unsigned long bd_new_entry;
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	301	int ret = 0;
				302
				303	/*
				304	* Carve the virtual space out of userspace for the new
				305	* bounds table:
				306	*/
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	307	bt_addr = mpx_mmap(mpx_bt_size_bytes(mm));
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	308	if (IS_ERR((void *)bt_addr))
				309	return PTR_ERR((void *)bt_addr);
				310	/*
				311	* Set the valid flag (kinda like _PAGE_PRESENT in a pte)
				312	*/
Dave Hansen	a1149fc	2015-06-07 11:37:04 -0700	[diff] [blame]	313	bd_new_entry = bt_addr \| MPX_BD_ENTRY_VALID_FLAG;
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	314
				315	/*
				316	* Go poke the address of the new bounds table in to the
				317	* bounds directory entry out in userspace memory. Note:
				318	* we may race with another CPU instantiating the same table.
				319	* In that case the cmpxchg will see an unexpected
				320	* 'actual_old_val'.
				321	*
				322	* This can fault, but that's OK because we do not hold
				323	* mmap_sem at this point, unlike some of the other part
				324	* of the MPX code that have to pagefault_disable().
				325	*/
Dave Hansen	6ac52bb	2015-06-07 11:37:05 -0700	[diff] [blame]	326	ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val, bd_entry,
				327	expected_old_val, bd_new_entry);
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	328	if (ret)
				329	goto out_unmap;
				330
				331	/*
				332	* The user_atomic_cmpxchg_inatomic() will only return nonzero
				333	* for faults, not if the cmpxchg itself fails. Now we must
				334	* verify that the cmpxchg itself completed successfully.
				335	*/
				336	/*
				337	* We expected an empty 'expected_old_val', but instead found
				338	* an apparently valid entry. Assume we raced with another
				339	* thread to instantiate this table and desclare succecss.
				340	*/
				341	if (actual_old_val & MPX_BD_ENTRY_VALID_FLAG) {
				342	ret = 0;
				343	goto out_unmap;
				344	}
				345	/*
				346	* We found a non-empty bd_entry but it did not have the
				347	* VALID_FLAG set. Return an error which will result in
				348	* a SEGV since this probably means that somebody scribbled
				349	* some invalid data in to a bounds table.
				350	*/
				351	if (expected_old_val != actual_old_val) {
				352	ret = -EINVAL;
				353	goto out_unmap;
				354	}
Dave Hansen	cd4996d	2015-06-07 11:37:04 -0700	[diff] [blame]	355	trace_mpx_new_bounds_table(bt_addr);
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	356	return 0;
				357	out_unmap:
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	358	vm_munmap(bt_addr, mpx_bt_size_bytes(mm));
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	359	return ret;
				360	}
				361
				362	/*
				363	* When a BNDSTX instruction attempts to save bounds to a bounds
				364	* table, it will first attempt to look up the table in the
				365	* first-level bounds directory. If it does not find a table in
				366	* the directory, a #BR is generated and we get here in order to
				367	* allocate a new table.
				368	*
				369	* With 32-bit mode, the size of BD is 4MB, and the size of each
				370	* bound table is 16KB. With 64-bit mode, the size of BD is 2GB,
				371	* and the size of each bound table is 4MB.
				372	*/
Dave Hansen	46a6e0c	2015-06-07 11:37:02 -0700	[diff] [blame]	373	static int do_mpx_bt_fault(void)
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	374	{
				375	unsigned long bd_entry, bd_base;
Dave Hansen	1126cb45	2015-09-02 16:31:29 -0700	[diff] [blame]	376	const struct mpx_bndcsr *bndcsr;
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	377	struct mm_struct *mm = current->mm;
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	378
Sebastian Andrzej Siewior	abd16d6	2019-04-03 18:41:40 +0200	[diff] [blame]	379	bndcsr = get_xsave_field_ptr(XFEATURE_BNDCSR);
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	380	if (!bndcsr)
				381	return -EINVAL;
				382	/*
				383	* Mask off the preserve and enable bits
				384	*/
				385	bd_base = bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK;
				386	/*
				387	* The hardware provides the address of the missing or invalid
				388	* entry via BNDSTATUS, so we don't have to go look it up.
				389	*/
				390	bd_entry = bndcsr->bndstatus & MPX_BNDSTA_ADDR_MASK;
				391	/*
				392	* Make sure the directory entry is within where we think
				393	* the directory is.
				394	*/
				395	if ((bd_entry < bd_base) \|\|
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	396	(bd_entry >= bd_base + mpx_bd_size_bytes(mm)))
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	397	return -EINVAL;
				398
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	399	return allocate_bt(mm, (long __user *)bd_entry);
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	400	}
				401
Dave Hansen	46a6e0c	2015-06-07 11:37:02 -0700	[diff] [blame]	402	int mpx_handle_bd_fault(void)
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	403	{
				404	/*
				405	* Userspace never asked us to manage the bounds tables,
				406	* so refuse to help.
				407	*/
				408	if (!kernel_managing_mpx_tables(current->mm))
				409	return -EINVAL;
				410
Joerg Roedel	5ed386e	2017-04-06 16:19:22 +0200	[diff] [blame]	411	return do_mpx_bt_fault();
Dave Hansen	fe3d197	2014-11-14 07:18:29 -0800	[diff] [blame]	412	}
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	413
				414	/*
				415	* A thin wrapper around get_user_pages(). Returns 0 if the
				416	* fault was resolved or -errno if not.
				417	*/
				418	static int mpx_resolve_fault(long __user *addr, int write)
				419	{
				420	long gup_ret;
				421	int nr_pages = 1;
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	422
Lorenzo Stoakes	768ae30	2016-10-13 01:20:16 +0100	[diff] [blame]	423	gup_ret = get_user_pages((unsigned long)addr, nr_pages,
				424	write ? FOLL_WRITE : 0, NULL, NULL);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	425	/*
				426	* get_user_pages() returns number of pages gotten.
				427	* 0 means we failed to fault in and get anything,
				428	* probably because 'addr' is bad.
				429	*/
				430	if (!gup_ret)
				431	return -EFAULT;
				432	/* Other error, return it */
				433	if (gup_ret < 0)
				434	return gup_ret;
				435	/* must have gup'd a page and gup_ret>0, success */
				436	return 0;
				437	}
				438
Dave Hansen	5458765	2015-06-07 11:37:04 -0700	[diff] [blame]	439	static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm,
				440	unsigned long bd_entry)
				441	{
				442	unsigned long bt_addr = bd_entry;
				443	int align_to_bytes;
				444	/*
				445	* Bit 0 in a bt_entry is always the valid bit.
				446	*/
				447	bt_addr &= ~MPX_BD_ENTRY_VALID_FLAG;
				448	/*
				449	* Tables are naturally aligned at 8-byte boundaries
				450	* on 64-bit and 4-byte boundaries on 32-bit. The
				451	* documentation makes it appear that the low bits
				452	* are ignored by the hardware, so we do the same.
				453	*/
				454	if (is_64bit_mm(mm))
				455	align_to_bytes = 8;
				456	else
				457	align_to_bytes = 4;
				458	bt_addr &= ~(align_to_bytes-1);
				459	return bt_addr;
				460	}
				461
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	462	/*
Dave Hansen	46561c3	2015-11-11 10:19:31 -0800	[diff] [blame]	463	* We only want to do a 4-byte get_user() on 32-bit. Otherwise,
				464	* we might run off the end of the bounds table if we are on
				465	* a 64-bit kernel and try to get 8 bytes.
				466	*/
Tobias Klauser	6bce725	2017-03-08 14:30:34 +0100	[diff] [blame]	467	static int get_user_bd_entry(struct mm_struct mm, unsigned long bd_entry_ret,
Dave Hansen	46561c3	2015-11-11 10:19:31 -0800	[diff] [blame]	468	long __user *bd_entry_ptr)
				469	{
				470	u32 bd_entry_32;
				471	int ret;
				472
				473	if (is_64bit_mm(mm))
				474	return get_user(*bd_entry_ret, bd_entry_ptr);
				475
				476	/*
				477	* Note that get_user() uses the type of the pointer to
				478	* establish the size of the get, not the destination.
				479	*/
				480	ret = get_user(bd_entry_32, (u32 __user *)bd_entry_ptr);
				481	*bd_entry_ret = bd_entry_32;
				482	return ret;
				483	}
				484
				485	/*
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	486	* Get the base of bounds tables pointed by specific bounds
				487	* directory entry.
				488	*/
				489	static int get_bt_addr(struct mm_struct *mm,
Dave Hansen	5458765	2015-06-07 11:37:04 -0700	[diff] [blame]	490	long __user *bd_entry_ptr,
				491	unsigned long *bt_addr_result)
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	492	{
				493	int ret;
				494	int valid_bit;
Dave Hansen	5458765	2015-06-07 11:37:04 -0700	[diff] [blame]	495	unsigned long bd_entry;
				496	unsigned long bt_addr;
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	497
Linus Torvalds	96d4f26	2019-01-03 18:57:57 -0800	[diff] [blame]	498	if (!access_ok((bd_entry_ptr), sizeof(*bd_entry_ptr)))
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	499	return -EFAULT;
				500
				501	while (1) {
				502	int need_write = 0;
				503
				504	pagefault_disable();
Dave Hansen	46561c3	2015-11-11 10:19:31 -0800	[diff] [blame]	505	ret = get_user_bd_entry(mm, &bd_entry, bd_entry_ptr);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	506	pagefault_enable();
				507	if (!ret)
				508	break;
				509	if (ret == -EFAULT)
Dave Hansen	5458765	2015-06-07 11:37:04 -0700	[diff] [blame]	510	ret = mpx_resolve_fault(bd_entry_ptr, need_write);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	511	/*
				512	* If we could not resolve the fault, consider it
				513	* userspace's fault and error out.
				514	*/
				515	if (ret)
				516	return ret;
				517	}
				518
Dave Hansen	5458765	2015-06-07 11:37:04 -0700	[diff] [blame]	519	valid_bit = bd_entry & MPX_BD_ENTRY_VALID_FLAG;
				520	bt_addr = mpx_bd_entry_to_bt_addr(mm, bd_entry);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	521
				522	/*
				523	* When the kernel is managing bounds tables, a bounds directory
				524	* entry will either have a valid address (plus the valid bit)
				525	* OR be completely empty. If we see a !valid entry and some
				526	* data in the address field, we know something is wrong. This
				527	* -EINVAL return will cause a SIGSEGV.
				528	*/
Dave Hansen	5458765	2015-06-07 11:37:04 -0700	[diff] [blame]	529	if (!valid_bit && bt_addr)
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	530	return -EINVAL;
				531	/*
				532	* Do we have an completely zeroed bt entry? That is OK. It
				533	* just means there was no bounds table for this memory. Make
				534	* sure to distinguish this from -EINVAL, which will cause
				535	* a SEGV.
				536	*/
				537	if (!valid_bit)
				538	return -ENOENT;
				539
Dave Hansen	5458765	2015-06-07 11:37:04 -0700	[diff] [blame]	540	*bt_addr_result = bt_addr;
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	541	return 0;
				542	}
				543
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	544	static inline int bt_entry_size_bytes(struct mm_struct *mm)
				545	{
				546	if (is_64bit_mm(mm))
				547	return MPX_BT_ENTRY_BYTES_64;
				548	else
				549	return MPX_BT_ENTRY_BYTES_32;
				550	}
				551
				552	/*
				553	* Take a virtual address and turns it in to the offset in bytes
				554	* inside of the bounds table where the bounds table entry
				555	* controlling 'addr' can be found.
				556	*/
				557	static unsigned long mpx_get_bt_entry_offset_bytes(struct mm_struct *mm,
				558	unsigned long addr)
				559	{
				560	unsigned long bt_table_nr_entries;
				561	unsigned long offset = addr;
				562
				563	if (is_64bit_mm(mm)) {
				564	/* Bottom 3 bits are ignored on 64-bit */
				565	offset >>= 3;
				566	bt_table_nr_entries = MPX_BT_NR_ENTRIES_64;
				567	} else {
				568	/* Bottom 2 bits are ignored on 32-bit */
				569	offset >>= 2;
				570	bt_table_nr_entries = MPX_BT_NR_ENTRIES_32;
				571	}
				572	/*
				573	* We know the size of the table in to which we are
				574	* indexing, and we have eliminated all the low bits
				575	* which are ignored for indexing.
				576	*
				577	* Mask out all the high bits which we do not need
				578	* to index in to the table. Note that the tables
				579	* are always powers of two so this gives us a proper
				580	* mask.
				581	*/
				582	offset &= (bt_table_nr_entries-1);
				583	/*
				584	* We now have an entry offset in terms of entries in
				585	* the table. We need to scale it back up to bytes.
				586	*/
				587	offset *= bt_entry_size_bytes(mm);
				588	return offset;
				589	}
				590
				591	/*
				592	* How much virtual address space does a single bounds
				593	* directory entry cover?
				594	*
				595	* Note, we need a long long because 4GB doesn't fit in
				596	* to a long on 32-bit.
				597	*/
				598	static inline unsigned long bd_entry_virt_space(struct mm_struct *mm)
				599	{
Dave Hansen	f3119b8	2015-11-11 10:19:34 -0800	[diff] [blame]	600	unsigned long long virt_space;
				601	unsigned long long GB = (1ULL << 30);
				602
				603	/*
				604	* This covers 32-bit emulation as well as 32-bit kernels
Adam Buchbinder	6a6256f	2016-02-23 15:34:30 -0800	[diff] [blame]	605	* running on 64-bit hardware.
Dave Hansen	f3119b8	2015-11-11 10:19:34 -0800	[diff] [blame]	606	*/
				607	if (!is_64bit_mm(mm))
				608	return (4ULL * GB) / MPX_BD_NR_ENTRIES_32;
				609
				610	/*
				611	* 'x86_virt_bits' returns what the hardware is capable
Adam Buchbinder	6a6256f	2016-02-23 15:34:30 -0800	[diff] [blame]	612	* of, and returns the full >32-bit address space when
Dave Hansen	f3119b8	2015-11-11 10:19:34 -0800	[diff] [blame]	613	* running 32-bit kernels on 64-bit hardware.
				614	*/
				615	virt_space = (1ULL << boot_cpu_data.x86_virt_bits);
				616	return virt_space / MPX_BD_NR_ENTRIES_64;
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	617	}
				618
				619	/*
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	620	* Free the backing physical pages of bounds table 'bt_addr'.
				621	* Assume start...end is within that bounds table.
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	622	*/
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	623	static noinline int zap_bt_entries_mapping(struct mm_struct *mm,
				624	unsigned long bt_addr,
				625	unsigned long start_mapping, unsigned long end_mapping)
				626	{
				627	struct vm_area_struct *vma;
				628	unsigned long addr, len;
				629	unsigned long start;
				630	unsigned long end;
				631
				632	/*
				633	* if we 'end' on a boundary, the offset will be 0 which
				634	* is not what we want. Back it up a byte to get the
				635	* last bt entry. Then once we have the entry itself,
				636	* move 'end' back up by the table entry size.
				637	*/
				638	start = bt_addr + mpx_get_bt_entry_offset_bytes(mm, start_mapping);
				639	end = bt_addr + mpx_get_bt_entry_offset_bytes(mm, end_mapping - 1);
				640	/*
				641	* Move end back up by one entry. Among other things
				642	* this ensures that it remains page-aligned and does
				643	* not screw up zap_page_range()
				644	*/
				645	end += bt_entry_size_bytes(mm);
				646
				647	/*
				648	* Find the first overlapping vma. If vma->vm_start > start, there
				649	* will be a hole in the bounds table. This -EINVAL return will
				650	* cause a SIGSEGV.
				651	*/
				652	vma = find_vma(mm, start);
				653	if (!vma \|\| vma->vm_start > start)
				654	return -EINVAL;
				655
				656	/*
				657	* A NUMA policy on a VM_MPX VMA could cause this bounds table to
				658	* be split. So we need to look across the entire 'start -> end'
				659	* range of this bounds table, find all of the VM_MPX VMAs, and
				660	* zap only those.
				661	*/
				662	addr = start;
				663	while (vma && vma->vm_start < end) {
				664	/*
				665	* We followed a bounds directory entry down
				666	* here. If we find a non-MPX VMA, that's bad,
				667	* so stop immediately and return an error. This
				668	* probably results in a SIGSEGV.
				669	*/
Kirill A. Shutemov	a8965276	2015-07-20 14:29:58 -0700	[diff] [blame]	670	if (!(vma->vm_flags & VM_MPX))
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	671	return -EINVAL;
				672
				673	len = min(vma->vm_end, end) - addr;
Kirill A. Shutemov	ecf1385	2017-02-22 15:46:37 -0800	[diff] [blame]	674	zap_page_range(vma, addr, len);
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	675	trace_mpx_unmap_zap(addr, addr+len);
				676
				677	vma = vma->vm_next;
				678	addr = vma->vm_start;
				679	}
				680	return 0;
				681	}
				682
Dave Hansen	613fcb7	2015-06-07 11:37:05 -0700	[diff] [blame]	683	static unsigned long mpx_get_bd_entry_offset(struct mm_struct *mm,
				684	unsigned long addr)
				685	{
				686	/*
				687	* There are several ways to derive the bd offsets. We
				688	* use the following approach here:
				689	* 1. We know the size of the virtual address space
				690	* 2. We know the number of entries in a bounds table
				691	* 3. We know that each entry covers a fixed amount of
				692	* virtual address space.
				693	* So, we can just divide the virtual address by the
				694	* virtual space used by one entry to determine which
				695	* entry "controls" the given virtual address.
				696	*/
				697	if (is_64bit_mm(mm)) {
				698	int bd_entry_size = 8; /* 64-bit pointer */
				699	/*
				700	* Take the 64-bit addressing hole in to account.
				701	*/
				702	addr &= ((1UL << boot_cpu_data.x86_virt_bits) - 1);
				703	return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
				704	} else {
				705	int bd_entry_size = 4; /* 32-bit pointer */
				706	/*
				707	* 32-bit has no hole so this case needs no mask
				708	*/
				709	return (addr / bd_entry_virt_space(mm)) * bd_entry_size;
				710	}
				711	/*
				712	* The two return calls above are exact copies. If we
				713	* pull out a single copy and put it in here, gcc won't
				714	* realize that we're doing a power-of-2 divide and use
				715	* shifts. It uses a real divide. If we put them up
				716	* there, it manages to figure it out (gcc 4.8.3).
				717	*/
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	718	}
				719
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	720	static int unmap_entire_bt(struct mm_struct *mm,
				721	long __user *bd_entry, unsigned long bt_addr)
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	722	{
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	723	unsigned long expected_old_val = bt_addr \| MPX_BD_ENTRY_VALID_FLAG;
				724	unsigned long uninitialized_var(actual_old_val);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	725	int ret;
				726
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	727	while (1) {
				728	int need_write = 1;
				729	unsigned long cleared_bd_entry = 0;
				730
				731	pagefault_disable();
				732	ret = mpx_cmpxchg_bd_entry(mm, &actual_old_val,
				733	bd_entry, expected_old_val, cleared_bd_entry);
				734	pagefault_enable();
				735	if (!ret)
				736	break;
				737	if (ret == -EFAULT)
				738	ret = mpx_resolve_fault(bd_entry, need_write);
				739	/*
				740	* If we could not resolve the fault, consider it
				741	* userspace's fault and error out.
				742	*/
				743	if (ret)
				744	return ret;
				745	}
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	746	/*
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	747	* The cmpxchg was performed, check the results.
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	748	*/
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	749	if (actual_old_val != expected_old_val) {
				750	/*
				751	* Someone else raced with us to unmap the table.
				752	* That is OK, since we were both trying to do
				753	* the same thing. Declare success.
				754	*/
				755	if (!actual_old_val)
				756	return 0;
				757	/*
				758	* Something messed with the bounds directory
				759	* entry. We hold mmap_sem for read or write
				760	* here, so it could not be a _new_ bounds table
				761	* that someone just allocated. Something is
				762	* wrong, so pass up the error and SIGSEGV.
				763	*/
				764	return -EINVAL;
				765	}
				766	/*
				767	* Note, we are likely being called under do_munmap() already. To
				768	* avoid recursion, do_munmap() will check whether it comes
				769	* from one bounds table through VM_MPX flag.
				770	*/
Mike Rapoport	897ab3e	2017-02-24 14:58:22 -0800	[diff] [blame]	771	return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm), NULL);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	772	}
				773
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	774	static int try_unmap_single_bt(struct mm_struct *mm,
				775	unsigned long start, unsigned long end)
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	776	{
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	777	struct vm_area_struct *next;
				778	struct vm_area_struct *prev;
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	779	/*
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	780	* "bta" == Bounds Table Area: the area controlled by the
				781	* bounds table that we are unmapping.
				782	*/
				783	unsigned long bta_start_vaddr = start & ~(bd_entry_virt_space(mm)-1);
				784	unsigned long bta_end_vaddr = bta_start_vaddr + bd_entry_virt_space(mm);
				785	unsigned long uninitialized_var(bt_addr);
				786	void __user *bde_vaddr;
				787	int ret;
				788	/*
Dave Hansen	bea03c5	2015-06-07 11:37:06 -0700	[diff] [blame]	789	* We already unlinked the VMAs from the mm's rbtree so 'start'
				790	* is guaranteed to be in a hole. This gets us the first VMA
				791	* before the hole in to 'prev' and the next VMA after the hole
				792	* in to 'next'.
				793	*/
				794	next = find_vma_prev(mm, start, &prev);
				795	/*
				796	* Do not count other MPX bounds table VMAs as neighbors.
				797	* Although theoretically possible, we do not allow bounds
				798	* tables for bounds tables so our heads do not explode.
				799	* If we count them as neighbors here, we may end up with
				800	* lots of tables even though we have no actual table
				801	* entries in use.
				802	*/
Kirill A. Shutemov	a8965276	2015-07-20 14:29:58 -0700	[diff] [blame]	803	while (next && (next->vm_flags & VM_MPX))
Dave Hansen	bea03c5	2015-06-07 11:37:06 -0700	[diff] [blame]	804	next = next->vm_next;
Kirill A. Shutemov	a8965276	2015-07-20 14:29:58 -0700	[diff] [blame]	805	while (prev && (prev->vm_flags & VM_MPX))
Dave Hansen	bea03c5	2015-06-07 11:37:06 -0700	[diff] [blame]	806	prev = prev->vm_prev;
				807	/*
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	808	* We know 'start' and 'end' lie within an area controlled
				809	* by a single bounds table. See if there are any other
				810	* VMAs controlled by that bounds table. If there are not
				811	* then we can "expand" the are we are unmapping to possibly
				812	* cover the entire table.
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	813	*/
				814	next = find_vma_prev(mm, start, &prev);
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	815	if ((!prev \|\| prev->vm_end <= bta_start_vaddr) &&
				816	(!next \|\| next->vm_start >= bta_end_vaddr)) {
				817	/*
				818	* No neighbor VMAs controlled by same bounds
				819	* table. Try to unmap the whole thing
				820	*/
				821	start = bta_start_vaddr;
				822	end = bta_end_vaddr;
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	823	}
				824
Mark Rutland	cb02de9	2016-12-16 12:40:55 +0000	[diff] [blame]	825	bde_vaddr = mm->context.bd_addr + mpx_get_bd_entry_offset(mm, start);
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	826	ret = get_bt_addr(mm, bde_vaddr, &bt_addr);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	827	/*
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	828	* No bounds table there, so nothing to unmap.
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	829	*/
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	830	if (ret == -ENOENT) {
				831	ret = 0;
				832	return 0;
				833	}
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	834	if (ret)
				835	return ret;
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	836	/*
				837	* We are unmapping an entire table. Either because the
				838	* unmap that started this whole process was large enough
				839	* to cover an entire table, or that the unmap was small
				840	* but was the area covered by a bounds table.
				841	*/
				842	if ((start == bta_start_vaddr) &&
				843	(end == bta_end_vaddr))
				844	return unmap_entire_bt(mm, bde_vaddr, bt_addr);
				845	return zap_bt_entries_mapping(mm, bt_addr, start, end);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	846	}
				847
				848	static int mpx_unmap_tables(struct mm_struct *mm,
				849	unsigned long start, unsigned long end)
				850	{
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	851	unsigned long one_unmap_start;
Dave Hansen	2a1dcb1	2015-06-07 11:37:03 -0700	[diff] [blame]	852	trace_mpx_unmap_search(start, end);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	853
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	854	one_unmap_start = start;
				855	while (one_unmap_start < end) {
				856	int ret;
				857	unsigned long next_unmap_start = ALIGN(one_unmap_start+1,
				858	bd_entry_virt_space(mm));
				859	unsigned long one_unmap_end = end;
				860	/*
				861	* if the end is beyond the current bounds table,
				862	* move it back so we only deal with a single one
				863	* at a time
				864	*/
				865	if (one_unmap_end > next_unmap_start)
				866	one_unmap_end = next_unmap_start;
				867	ret = try_unmap_single_bt(mm, one_unmap_start, one_unmap_end);
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	868	if (ret)
				869	return ret;
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	870
Dave Hansen	3ceaccd	2015-06-07 11:37:06 -0700	[diff] [blame]	871	one_unmap_start = next_unmap_start;
				872	}
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	873	return 0;
				874	}
				875
				876	/*
				877	* Free unused bounds tables covered in a virtual address region being
				878	* munmap()ed. Assume end > start.
				879	*
				880	* This function will be called by do_munmap(), and the VMAs covering
				881	* the virtual address region start...end have already been split if
				882	* necessary, and the 'vma' is the first vma in this range (start -> end).
				883	*/
Dave Hansen	5a28fc9	2019-04-19 12:47:47 -0700	[diff] [blame]	884	void mpx_notify_unmap(struct mm_struct *mm, unsigned long start,
				885	unsigned long end)
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	886	{
Dave Hansen	5a28fc9	2019-04-19 12:47:47 -0700	[diff] [blame]	887	struct vm_area_struct *vma;
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	888	int ret;
				889
				890	/*
				891	* Refuse to do anything unless userspace has asked
				892	* the kernel to help manage the bounds tables,
				893	*/
				894	if (!kernel_managing_mpx_tables(current->mm))
				895	return;
				896	/*
				897	* This will look across the entire 'start -> end' range,
				898	* and find all of the non-VM_MPX VMAs.
				899	*
				900	* To avoid recursion, if a VM_MPX vma is found in the range
				901	* (start->end), we will not continue follow-up work. This
				902	* recursion represents having bounds tables for bounds tables,
				903	* which should not occur normally. Being strict about it here
				904	* helps ensure that we do not have an exploitable stack overflow.
				905	*/
Dave Hansen	5a28fc9	2019-04-19 12:47:47 -0700	[diff] [blame]	906	vma = find_vma(mm, start);
				907	while (vma && vma->vm_start < end) {
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	908	if (vma->vm_flags & VM_MPX)
				909	return;
				910	vma = vma->vm_next;
Dave Hansen	5a28fc9	2019-04-19 12:47:47 -0700	[diff] [blame]	911	}
Dave Hansen	1de4fa1	2014-11-14 07:18:31 -0800	[diff] [blame]	912
				913	ret = mpx_unmap_tables(mm, start, end);
				914	if (ret)
				915	force_sig(SIGSEGV, current);
				916	}
Kirill A. Shutemov	44b0491	2017-07-17 01:59:51 +0300	[diff] [blame]	917
				918	/* MPX cannot handle addresses above 47 bits yet. */
				919	unsigned long mpx_unmapped_area_check(unsigned long addr, unsigned long len,
				920	unsigned long flags)
				921	{
				922	if (!kernel_managing_mpx_tables(current->mm))
				923	return addr;
				924	if (addr + len <= DEFAULT_MAP_WINDOW)
				925	return addr;
				926	if (flags & MAP_FIXED)
				927	return -ENOMEM;
				928
				929	/*
				930	* Requested len is larger than the whole area we're allowed to map in.
				931	* Resetting hinting address wouldn't do much good -- fail early.
				932	*/
				933	if (len > DEFAULT_MAP_WINDOW)
				934	return -ENOMEM;
				935
				936	/* Look for unmap area within DEFAULT_MAP_WINDOW */
				937	return 0;
				938	}