Blame - kernel/bpf/syscall.c - linux-4.4

blob: 0afb4eaa1887e71002e247789c1333adf203350c [file] [log] [blame]

Alexei Starovoitov	99c55f7	2014-09-26 00:16:57 -0700	[diff] [blame]	1	/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
				2	*
				3	* This program is free software; you can redistribute it and/or
				4	* modify it under the terms of version 2 of the GNU General Public
				5	* License as published by the Free Software Foundation.
				6	*
				7	* This program is distributed in the hope that it will be useful, but
				8	* WITHOUT ANY WARRANTY; without even the implied warranty of
				9	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				10	* General Public License for more details.
				11	*/
				12	#include <linux/bpf.h>
				13	#include <linux/syscalls.h>
				14	#include <linux/slab.h>
				15	#include <linux/anon_inodes.h>
Alexei Starovoitov	db20fd2	2014-09-26 00:16:59 -0700	[diff] [blame]	16	#include <linux/file.h>
Alexei Starovoitov	09756af	2014-09-26 00:17:00 -0700	[diff] [blame^]	17	#include <linux/license.h>
				18	#include <linux/filter.h>
Alexei Starovoitov	99c55f7	2014-09-26 00:16:57 -0700	[diff] [blame]	19
				20	static LIST_HEAD(bpf_map_types);
				21
				22	static struct bpf_map find_and_alloc_map(union bpf_attr attr)
				23	{
				24	struct bpf_map_type_list *tl;
				25	struct bpf_map *map;
				26
				27	list_for_each_entry(tl, &bpf_map_types, list_node) {
				28	if (tl->type == attr->map_type) {
				29	map = tl->ops->map_alloc(attr);
				30	if (IS_ERR(map))
				31	return map;
				32	map->ops = tl->ops;
				33	map->map_type = attr->map_type;
				34	return map;
				35	}
				36	}
				37	return ERR_PTR(-EINVAL);
				38	}
				39
				40	/* boot time registration of different map implementations */
				41	void bpf_register_map_type(struct bpf_map_type_list *tl)
				42	{
				43	list_add(&tl->list_node, &bpf_map_types);
				44	}
				45
				46	/* called from workqueue */
				47	static void bpf_map_free_deferred(struct work_struct *work)
				48	{
				49	struct bpf_map *map = container_of(work, struct bpf_map, work);
				50
				51	/* implementation dependent freeing */
				52	map->ops->map_free(map);
				53	}
				54
				55	/* decrement map refcnt and schedule it for freeing via workqueue
				56	* (unrelying map implementation ops->map_free() might sleep)
				57	*/
				58	void bpf_map_put(struct bpf_map *map)
				59	{
				60	if (atomic_dec_and_test(&map->refcnt)) {
				61	INIT_WORK(&map->work, bpf_map_free_deferred);
				62	schedule_work(&map->work);
				63	}
				64	}
				65
				66	static int bpf_map_release(struct inode inode, struct file filp)
				67	{
				68	struct bpf_map *map = filp->private_data;
				69
				70	bpf_map_put(map);
				71	return 0;
				72	}
				73
				74	static const struct file_operations bpf_map_fops = {
				75	.release = bpf_map_release,
				76	};
				77
				78	/* helper macro to check that unused fields 'union bpf_attr' are zero */
				79	#define CHECK_ATTR(CMD) \
				80	memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
				81	sizeof(attr->CMD##_LAST_FIELD), 0, \
				82	sizeof(*attr) - \
				83	offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
				84	sizeof(attr->CMD##_LAST_FIELD)) != NULL
				85
				86	#define BPF_MAP_CREATE_LAST_FIELD max_entries
				87	/* called via syscall */
				88	static int map_create(union bpf_attr *attr)
				89	{
				90	struct bpf_map *map;
				91	int err;
				92
				93	err = CHECK_ATTR(BPF_MAP_CREATE);
				94	if (err)
				95	return -EINVAL;
				96
				97	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
				98	map = find_and_alloc_map(attr);
				99	if (IS_ERR(map))
				100	return PTR_ERR(map);
				101
				102	atomic_set(&map->refcnt, 1);
				103
				104	err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR \| O_CLOEXEC);
				105
				106	if (err < 0)
				107	/* failed to allocate fd */
				108	goto free_map;
				109
				110	return err;
				111
				112	free_map:
				113	map->ops->map_free(map);
				114	return err;
				115	}
				116
Alexei Starovoitov	db20fd2	2014-09-26 00:16:59 -0700	[diff] [blame]	117	/* if error is returned, fd is released.
				118	* On success caller should complete fd access with matching fdput()
				119	*/
				120	struct bpf_map *bpf_map_get(struct fd f)
				121	{
				122	struct bpf_map *map;
				123
				124	if (!f.file)
				125	return ERR_PTR(-EBADF);
				126
				127	if (f.file->f_op != &bpf_map_fops) {
				128	fdput(f);
				129	return ERR_PTR(-EINVAL);
				130	}
				131
				132	map = f.file->private_data;
				133
				134	return map;
				135	}
				136
				137	/* helper to convert user pointers passed inside __aligned_u64 fields */
				138	static void __user *u64_to_ptr(__u64 val)
				139	{
				140	return (void __user *) (unsigned long) val;
				141	}
				142
				143	/* last field in 'union bpf_attr' used by this command */
				144	#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
				145
				146	static int map_lookup_elem(union bpf_attr *attr)
				147	{
				148	void __user *ukey = u64_to_ptr(attr->key);
				149	void __user *uvalue = u64_to_ptr(attr->value);
				150	int ufd = attr->map_fd;
				151	struct fd f = fdget(ufd);
				152	struct bpf_map *map;
				153	void key, value;
				154	int err;
				155
				156	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
				157	return -EINVAL;
				158
				159	map = bpf_map_get(f);
				160	if (IS_ERR(map))
				161	return PTR_ERR(map);
				162
				163	err = -ENOMEM;
				164	key = kmalloc(map->key_size, GFP_USER);
				165	if (!key)
				166	goto err_put;
				167
				168	err = -EFAULT;
				169	if (copy_from_user(key, ukey, map->key_size) != 0)
				170	goto free_key;
				171
				172	err = -ESRCH;
				173	rcu_read_lock();
				174	value = map->ops->map_lookup_elem(map, key);
				175	if (!value)
				176	goto err_unlock;
				177
				178	err = -EFAULT;
				179	if (copy_to_user(uvalue, value, map->value_size) != 0)
				180	goto err_unlock;
				181
				182	err = 0;
				183
				184	err_unlock:
				185	rcu_read_unlock();
				186	free_key:
				187	kfree(key);
				188	err_put:
				189	fdput(f);
				190	return err;
				191	}
				192
				193	#define BPF_MAP_UPDATE_ELEM_LAST_FIELD value
				194
				195	static int map_update_elem(union bpf_attr *attr)
				196	{
				197	void __user *ukey = u64_to_ptr(attr->key);
				198	void __user *uvalue = u64_to_ptr(attr->value);
				199	int ufd = attr->map_fd;
				200	struct fd f = fdget(ufd);
				201	struct bpf_map *map;
				202	void key, value;
				203	int err;
				204
				205	if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
				206	return -EINVAL;
				207
				208	map = bpf_map_get(f);
				209	if (IS_ERR(map))
				210	return PTR_ERR(map);
				211
				212	err = -ENOMEM;
				213	key = kmalloc(map->key_size, GFP_USER);
				214	if (!key)
				215	goto err_put;
				216
				217	err = -EFAULT;
				218	if (copy_from_user(key, ukey, map->key_size) != 0)
				219	goto free_key;
				220
				221	err = -ENOMEM;
				222	value = kmalloc(map->value_size, GFP_USER);
				223	if (!value)
				224	goto free_key;
				225
				226	err = -EFAULT;
				227	if (copy_from_user(value, uvalue, map->value_size) != 0)
				228	goto free_value;
				229
				230	/* eBPF program that use maps are running under rcu_read_lock(),
				231	* therefore all map accessors rely on this fact, so do the same here
				232	*/
				233	rcu_read_lock();
				234	err = map->ops->map_update_elem(map, key, value);
				235	rcu_read_unlock();
				236
				237	free_value:
				238	kfree(value);
				239	free_key:
				240	kfree(key);
				241	err_put:
				242	fdput(f);
				243	return err;
				244	}
				245
				246	#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
				247
				248	static int map_delete_elem(union bpf_attr *attr)
				249	{
				250	void __user *ukey = u64_to_ptr(attr->key);
				251	int ufd = attr->map_fd;
				252	struct fd f = fdget(ufd);
				253	struct bpf_map *map;
				254	void *key;
				255	int err;
				256
				257	if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
				258	return -EINVAL;
				259
				260	map = bpf_map_get(f);
				261	if (IS_ERR(map))
				262	return PTR_ERR(map);
				263
				264	err = -ENOMEM;
				265	key = kmalloc(map->key_size, GFP_USER);
				266	if (!key)
				267	goto err_put;
				268
				269	err = -EFAULT;
				270	if (copy_from_user(key, ukey, map->key_size) != 0)
				271	goto free_key;
				272
				273	rcu_read_lock();
				274	err = map->ops->map_delete_elem(map, key);
				275	rcu_read_unlock();
				276
				277	free_key:
				278	kfree(key);
				279	err_put:
				280	fdput(f);
				281	return err;
				282	}
				283
				284	/* last field in 'union bpf_attr' used by this command */
				285	#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
				286
				287	static int map_get_next_key(union bpf_attr *attr)
				288	{
				289	void __user *ukey = u64_to_ptr(attr->key);
				290	void __user *unext_key = u64_to_ptr(attr->next_key);
				291	int ufd = attr->map_fd;
				292	struct fd f = fdget(ufd);
				293	struct bpf_map *map;
				294	void key, next_key;
				295	int err;
				296
				297	if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
				298	return -EINVAL;
				299
				300	map = bpf_map_get(f);
				301	if (IS_ERR(map))
				302	return PTR_ERR(map);
				303
				304	err = -ENOMEM;
				305	key = kmalloc(map->key_size, GFP_USER);
				306	if (!key)
				307	goto err_put;
				308
				309	err = -EFAULT;
				310	if (copy_from_user(key, ukey, map->key_size) != 0)
				311	goto free_key;
				312
				313	err = -ENOMEM;
				314	next_key = kmalloc(map->key_size, GFP_USER);
				315	if (!next_key)
				316	goto free_key;
				317
				318	rcu_read_lock();
				319	err = map->ops->map_get_next_key(map, key, next_key);
				320	rcu_read_unlock();
				321	if (err)
				322	goto free_next_key;
				323
				324	err = -EFAULT;
				325	if (copy_to_user(unext_key, next_key, map->key_size) != 0)
				326	goto free_next_key;
				327
				328	err = 0;
				329
				330	free_next_key:
				331	kfree(next_key);
				332	free_key:
				333	kfree(key);
				334	err_put:
				335	fdput(f);
				336	return err;
				337	}
				338
Alexei Starovoitov	09756af	2014-09-26 00:17:00 -0700	[diff] [blame^]	339	static LIST_HEAD(bpf_prog_types);
				340
				341	static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
				342	{
				343	struct bpf_prog_type_list *tl;
				344
				345	list_for_each_entry(tl, &bpf_prog_types, list_node) {
				346	if (tl->type == type) {
				347	prog->aux->ops = tl->ops;
				348	prog->aux->prog_type = type;
				349	return 0;
				350	}
				351	}
				352	return -EINVAL;
				353	}
				354
				355	void bpf_register_prog_type(struct bpf_prog_type_list *tl)
				356	{
				357	list_add(&tl->list_node, &bpf_prog_types);
				358	}
				359
				360	/* drop refcnt on maps used by eBPF program and free auxilary data */
				361	static void free_used_maps(struct bpf_prog_aux *aux)
				362	{
				363	int i;
				364
				365	for (i = 0; i < aux->used_map_cnt; i++)
				366	bpf_map_put(aux->used_maps[i]);
				367
				368	kfree(aux->used_maps);
				369	}
				370
				371	void bpf_prog_put(struct bpf_prog *prog)
				372	{
				373	if (atomic_dec_and_test(&prog->aux->refcnt)) {
				374	free_used_maps(prog->aux);
				375	bpf_prog_free(prog);
				376	}
				377	}
				378
				379	static int bpf_prog_release(struct inode inode, struct file filp)
				380	{
				381	struct bpf_prog *prog = filp->private_data;
				382
				383	bpf_prog_put(prog);
				384	return 0;
				385	}
				386
				387	static const struct file_operations bpf_prog_fops = {
				388	.release = bpf_prog_release,
				389	};
				390
				391	static struct bpf_prog *get_prog(struct fd f)
				392	{
				393	struct bpf_prog *prog;
				394
				395	if (!f.file)
				396	return ERR_PTR(-EBADF);
				397
				398	if (f.file->f_op != &bpf_prog_fops) {
				399	fdput(f);
				400	return ERR_PTR(-EINVAL);
				401	}
				402
				403	prog = f.file->private_data;
				404
				405	return prog;
				406	}
				407
				408	/* called by sockets/tracing/seccomp before attaching program to an event
				409	* pairs with bpf_prog_put()
				410	*/
				411	struct bpf_prog *bpf_prog_get(u32 ufd)
				412	{
				413	struct fd f = fdget(ufd);
				414	struct bpf_prog *prog;
				415
				416	prog = get_prog(f);
				417
				418	if (IS_ERR(prog))
				419	return prog;
				420
				421	atomic_inc(&prog->aux->refcnt);
				422	fdput(f);
				423	return prog;
				424	}
				425
				426	/* last field in 'union bpf_attr' used by this command */
				427	#define BPF_PROG_LOAD_LAST_FIELD license
				428
				429	static int bpf_prog_load(union bpf_attr *attr)
				430	{
				431	enum bpf_prog_type type = attr->prog_type;
				432	struct bpf_prog *prog;
				433	int err;
				434	char license[128];
				435	bool is_gpl;
				436
				437	if (CHECK_ATTR(BPF_PROG_LOAD))
				438	return -EINVAL;
				439
				440	/* copy eBPF program license from user space */
				441	if (strncpy_from_user(license, u64_to_ptr(attr->license),
				442	sizeof(license) - 1) < 0)
				443	return -EFAULT;
				444	license[sizeof(license) - 1] = 0;
				445
				446	/* eBPF programs must be GPL compatible to use GPL-ed functions */
				447	is_gpl = license_is_gpl_compatible(license);
				448
				449	if (attr->insn_cnt >= BPF_MAXINSNS)
				450	return -EINVAL;
				451
				452	/* plain bpf_prog allocation */
				453	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
				454	if (!prog)
				455	return -ENOMEM;
				456
				457	prog->len = attr->insn_cnt;
				458
				459	err = -EFAULT;
				460	if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
				461	prog->len * sizeof(struct bpf_insn)) != 0)
				462	goto free_prog;
				463
				464	prog->orig_prog = NULL;
				465	prog->jited = false;
				466
				467	atomic_set(&prog->aux->refcnt, 1);
				468	prog->aux->is_gpl_compatible = is_gpl;
				469
				470	/* find program type: socket_filter vs tracing_filter */
				471	err = find_prog_type(type, prog);
				472	if (err < 0)
				473	goto free_prog;
				474
				475	/* run eBPF verifier */
				476	/* err = bpf_check(prog, tb); */
				477
				478	if (err < 0)
				479	goto free_used_maps;
				480
				481	/* eBPF program is ready to be JITed */
				482	bpf_prog_select_runtime(prog);
				483
				484	err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR \| O_CLOEXEC);
				485
				486	if (err < 0)
				487	/* failed to allocate fd */
				488	goto free_used_maps;
				489
				490	return err;
				491
				492	free_used_maps:
				493	free_used_maps(prog->aux);
				494	free_prog:
				495	bpf_prog_free(prog);
				496	return err;
				497	}
				498
Alexei Starovoitov	99c55f7	2014-09-26 00:16:57 -0700	[diff] [blame]	499	SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
				500	{
				501	union bpf_attr attr = {};
				502	int err;
				503
				504	/* the syscall is limited to root temporarily. This restriction will be
				505	* lifted when security audit is clean. Note that eBPF+tracing must have
				506	* this restriction, since it may pass kernel data to user space
				507	*/
				508	if (!capable(CAP_SYS_ADMIN))
				509	return -EPERM;
				510
				511	if (!access_ok(VERIFY_READ, uattr, 1))
				512	return -EFAULT;
				513
				514	if (size > PAGE_SIZE) /* silly large */
				515	return -E2BIG;
				516
				517	/* If we're handed a bigger struct than we know of,
				518	* ensure all the unknown bits are 0 - i.e. new
				519	* user-space does not rely on any kernel feature
				520	* extensions we dont know about yet.
				521	*/
				522	if (size > sizeof(attr)) {
				523	unsigned char __user *addr;
				524	unsigned char __user *end;
				525	unsigned char val;
				526
				527	addr = (void __user *)uattr + sizeof(attr);
				528	end = (void __user *)uattr + size;
				529
				530	for (; addr < end; addr++) {
				531	err = get_user(val, addr);
				532	if (err)
				533	return err;
				534	if (val)
				535	return -E2BIG;
				536	}
				537	size = sizeof(attr);
				538	}
				539
				540	/* copy attributes from user space, may be less than sizeof(bpf_attr) */
				541	if (copy_from_user(&attr, uattr, size) != 0)
				542	return -EFAULT;
				543
				544	switch (cmd) {
				545	case BPF_MAP_CREATE:
				546	err = map_create(&attr);
				547	break;
Alexei Starovoitov	db20fd2	2014-09-26 00:16:59 -0700	[diff] [blame]	548	case BPF_MAP_LOOKUP_ELEM:
				549	err = map_lookup_elem(&attr);
				550	break;
				551	case BPF_MAP_UPDATE_ELEM:
				552	err = map_update_elem(&attr);
				553	break;
				554	case BPF_MAP_DELETE_ELEM:
				555	err = map_delete_elem(&attr);
				556	break;
				557	case BPF_MAP_GET_NEXT_KEY:
				558	err = map_get_next_key(&attr);
				559	break;
Alexei Starovoitov	09756af	2014-09-26 00:17:00 -0700	[diff] [blame^]	560	case BPF_PROG_LOAD:
				561	err = bpf_prog_load(&attr);
				562	break;
Alexei Starovoitov	99c55f7	2014-09-26 00:16:57 -0700	[diff] [blame]	563	default:
				564	err = -EINVAL;
				565	break;
				566	}
				567
				568	return err;
				569	}