Blame - kernel/trace/bpf_trace.c - linux-4.4

blob: a71bdad638d57be26bd7ce63ab9c0929d237d5a1 [file] [log] [blame]

Alexei Starovoitov	2541517	2015-03-25 12:49:20 -0700	[diff] [blame]	1	/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
				2	*
				3	* This program is free software; you can redistribute it and/or
				4	* modify it under the terms of version 2 of the GNU General Public
				5	* License as published by the Free Software Foundation.
				6	*/
				7	#include <linux/kernel.h>
				8	#include <linux/types.h>
				9	#include <linux/slab.h>
				10	#include <linux/bpf.h>
				11	#include <linux/filter.h>
				12	#include <linux/uaccess.h>
Alexei Starovoitov	9c959c8	2015-03-25 12:49:22 -0700	[diff] [blame]	13	#include <linux/ctype.h>
Alexei Starovoitov	2541517	2015-03-25 12:49:20 -0700	[diff] [blame]	14	#include "trace.h"
				15
				16	static DEFINE_PER_CPU(int, bpf_prog_active);
				17
				18	/**
				19	* trace_call_bpf - invoke BPF program
				20	* @prog: BPF program
				21	* @ctx: opaque context pointer
				22	*
				23	* kprobe handlers execute BPF programs via this helper.
				24	* Can be used from static tracepoints in the future.
				25	*
				26	* Return: BPF programs always return an integer which is interpreted by
				27	* kprobe handler as:
				28	* 0 - return from kprobe (event is filtered out)
				29	* 1 - store kprobe event into ring buffer
				30	* Other values are reserved and currently alias to 1
				31	*/
				32	unsigned int trace_call_bpf(struct bpf_prog prog, void ctx)
				33	{
				34	unsigned int ret;
				35
				36	if (in_nmi()) /* not supported yet */
				37	return 1;
				38
				39	preempt_disable();
				40
				41	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
				42	/*
				43	* since some bpf program is already running on this cpu,
				44	* don't call into another bpf program (same or different)
				45	* and don't send kprobe event into ring-buffer,
				46	* so return zero here
				47	*/
				48	ret = 0;
				49	goto out;
				50	}
				51
				52	rcu_read_lock();
				53	ret = BPF_PROG_RUN(prog, ctx);
				54	rcu_read_unlock();
				55
				56	out:
				57	__this_cpu_dec(bpf_prog_active);
				58	preempt_enable();
				59
				60	return ret;
				61	}
				62	EXPORT_SYMBOL_GPL(trace_call_bpf);
				63
				64	static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
				65	{
				66	void dst = (void ) (long) r1;
				67	int size = (int) r2;
				68	void unsafe_ptr = (void ) (long) r3;
				69
				70	return probe_kernel_read(dst, unsafe_ptr, size);
				71	}
				72
				73	static const struct bpf_func_proto bpf_probe_read_proto = {
				74	.func = bpf_probe_read,
				75	.gpl_only = true,
				76	.ret_type = RET_INTEGER,
				77	.arg1_type = ARG_PTR_TO_STACK,
				78	.arg2_type = ARG_CONST_STACK_SIZE,
				79	.arg3_type = ARG_ANYTHING,
				80	};
				81
Alexei Starovoitov	9c959c8	2015-03-25 12:49:22 -0700	[diff] [blame]	82	/*
				83	* limited trace_printk()
Alexei Starovoitov	8d3b7dc	2015-08-28 15:56:23 -0700	[diff] [blame]	84	* only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed
Alexei Starovoitov	9c959c8	2015-03-25 12:49:22 -0700	[diff] [blame]	85	*/
				86	static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
				87	{
				88	char fmt = (char ) (long) r1;
Alexei Starovoitov	8d3b7dc	2015-08-28 15:56:23 -0700	[diff] [blame]	89	bool str_seen = false;
Alexei Starovoitov	9c959c8	2015-03-25 12:49:22 -0700	[diff] [blame]	90	int mod[3] = {};
				91	int fmt_cnt = 0;
Alexei Starovoitov	8d3b7dc	2015-08-28 15:56:23 -0700	[diff] [blame]	92	u64 unsafe_addr;
				93	char buf[64];
Alexei Starovoitov	9c959c8	2015-03-25 12:49:22 -0700	[diff] [blame]	94	int i;
				95
				96	/*
				97	* bpf_check()->check_func_arg()->check_stack_boundary()
				98	* guarantees that fmt points to bpf program stack,
				99	* fmt_size bytes of it were initialized and fmt_size > 0
				100	*/
				101	if (fmt[--fmt_size] != 0)
				102	return -EINVAL;
				103
				104	/* check format string for allowed specifiers */
				105	for (i = 0; i < fmt_size; i++) {
				106	if ((!isprint(fmt[i]) && !isspace(fmt[i])) \|\| !isascii(fmt[i]))
				107	return -EINVAL;
				108
				109	if (fmt[i] != '%')
				110	continue;
				111
				112	if (fmt_cnt >= 3)
				113	return -EINVAL;
				114
				115	/* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
				116	i++;
				117	if (fmt[i] == 'l') {
				118	mod[fmt_cnt]++;
				119	i++;
Alexei Starovoitov	8d3b7dc	2015-08-28 15:56:23 -0700	[diff] [blame]	120	} else if (fmt[i] == 'p' \|\| fmt[i] == 's') {
Alexei Starovoitov	9c959c8	2015-03-25 12:49:22 -0700	[diff] [blame]	121	mod[fmt_cnt]++;
Martynas Pumputis	ac86c99	2018-11-23 17:43:26 +0100	[diff] [blame]	122	/* disallow any further format extensions */
				123	if (fmt[i + 1] != 0 &&
				124	!isspace(fmt[i + 1]) &&
				125	!ispunct(fmt[i + 1]))
Alexei Starovoitov	9c959c8	2015-03-25 12:49:22 -0700	[diff] [blame]	126	return -EINVAL;
				127	fmt_cnt++;
Martynas Pumputis	ac86c99	2018-11-23 17:43:26 +0100	[diff] [blame]	128	if (fmt[i] == 's') {
Alexei Starovoitov	8d3b7dc	2015-08-28 15:56:23 -0700	[diff] [blame]	129	if (str_seen)
				130	/* allow only one '%s' per fmt string */
				131	return -EINVAL;
				132	str_seen = true;
				133
				134	switch (fmt_cnt) {
				135	case 1:
				136	unsafe_addr = r3;
				137	r3 = (long) buf;
				138	break;
				139	case 2:
				140	unsafe_addr = r4;
				141	r4 = (long) buf;
				142	break;
				143	case 3:
				144	unsafe_addr = r5;
				145	r5 = (long) buf;
				146	break;
				147	}
				148	buf[0] = 0;
				149	strncpy_from_unsafe(buf,
				150	(void *) (long) unsafe_addr,
				151	sizeof(buf));
				152	}
Alexei Starovoitov	9c959c8	2015-03-25 12:49:22 -0700	[diff] [blame]	153	continue;
				154	}
				155
				156	if (fmt[i] == 'l') {
				157	mod[fmt_cnt]++;
				158	i++;
				159	}
				160
				161	if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x')
				162	return -EINVAL;
				163	fmt_cnt++;
				164	}
				165
				166	return __trace_printk(1/* fake ip will not be printed */, fmt,
				167	mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3,
				168	mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4,
				169	mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5);
				170	}
				171
				172	static const struct bpf_func_proto bpf_trace_printk_proto = {
				173	.func = bpf_trace_printk,
				174	.gpl_only = true,
				175	.ret_type = RET_INTEGER,
				176	.arg1_type = ARG_PTR_TO_STACK,
				177	.arg2_type = ARG_CONST_STACK_SIZE,
				178	};
				179
Alexei Starovoitov	0756ea3	2015-06-12 19:39:13 -0700	[diff] [blame]	180	const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
				181	{
				182	/*
				183	* this program might be calling bpf_trace_printk,
				184	* so allocate per-cpu printk buffers
				185	*/
				186	trace_printk_init_buffers();
				187
				188	return &bpf_trace_printk_proto;
				189	}
				190
Kaixu Xia	35578d7	2015-08-06 07:02:35 +0000	[diff] [blame]	191	static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
				192	{
				193	struct bpf_map map = (struct bpf_map ) (unsigned long) r1;
				194	struct bpf_array *array = container_of(map, struct bpf_array, map);
				195	struct perf_event *event;
				196
				197	if (unlikely(index >= array->map.max_entries))
				198	return -E2BIG;
				199
				200	event = (struct perf_event *)array->ptrs[index];
				201	if (!event)
				202	return -ENOENT;
				203
Alexei Starovoitov	62544ce	2015-10-22 17:10:14 -0700	[diff] [blame]	204	/* make sure event is local and doesn't have pmu::count */
				205	if (event->oncpu != smp_processor_id() \|\|
				206	event->pmu->count)
				207	return -EINVAL;
				208
Alexei Starovoitov	529e188	2016-06-15 18:25:39 -0700	[diff] [blame]	209	if (unlikely(event->attr.type != PERF_TYPE_HARDWARE &&
				210	event->attr.type != PERF_TYPE_RAW))
				211	return -EINVAL;
				212
Kaixu Xia	35578d7	2015-08-06 07:02:35 +0000	[diff] [blame]	213	/*
				214	* we don't know if the function is run successfully by the
				215	* return value. It can be judged in other places, such as
				216	* eBPF programs.
				217	*/
				218	return perf_event_read_local(event);
				219	}
				220
Alexei Starovoitov	62544ce	2015-10-22 17:10:14 -0700	[diff] [blame]	221	static const struct bpf_func_proto bpf_perf_event_read_proto = {
Kaixu Xia	35578d7	2015-08-06 07:02:35 +0000	[diff] [blame]	222	.func = bpf_perf_event_read,
Alexei Starovoitov	1075ef5	2015-10-23 14:58:19 -0700	[diff] [blame]	223	.gpl_only = true,
Kaixu Xia	35578d7	2015-08-06 07:02:35 +0000	[diff] [blame]	224	.ret_type = RET_INTEGER,
				225	.arg1_type = ARG_CONST_MAP_PTR,
				226	.arg2_type = ARG_ANYTHING,
				227	};
				228
Alexei Starovoitov	a43eec3	2015-10-20 20:02:34 -0700	[diff] [blame]	229	static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, u64 r4, u64 size)
				230	{
				231	struct pt_regs regs = (struct pt_regs ) (long) r1;
				232	struct bpf_map map = (struct bpf_map ) (long) r2;
				233	struct bpf_array *array = container_of(map, struct bpf_array, map);
				234	void data = (void ) (long) r4;
				235	struct perf_sample_data sample_data;
				236	struct perf_event *event;
				237	struct perf_raw_record raw = {
				238	.size = size,
				239	.data = data,
				240	};
				241
				242	if (unlikely(index >= array->map.max_entries))
				243	return -E2BIG;
				244
				245	event = (struct perf_event *)array->ptrs[index];
				246	if (unlikely(!event))
				247	return -ENOENT;
				248
				249	if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE \|\|
				250	event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
				251	return -EINVAL;
				252
				253	if (unlikely(event->oncpu != smp_processor_id()))
				254	return -EOPNOTSUPP;
				255
				256	perf_sample_data_init(&sample_data, 0, 0);
				257	sample_data.raw = &raw;
				258	perf_event_output(event, &sample_data, regs);
				259	return 0;
				260	}
				261
				262	static const struct bpf_func_proto bpf_perf_event_output_proto = {
				263	.func = bpf_perf_event_output,
Alexei Starovoitov	1075ef5	2015-10-23 14:58:19 -0700	[diff] [blame]	264	.gpl_only = true,
Alexei Starovoitov	a43eec3	2015-10-20 20:02:34 -0700	[diff] [blame]	265	.ret_type = RET_INTEGER,
				266	.arg1_type = ARG_PTR_TO_CTX,
				267	.arg2_type = ARG_CONST_MAP_PTR,
				268	.arg3_type = ARG_ANYTHING,
				269	.arg4_type = ARG_PTR_TO_STACK,
				270	.arg5_type = ARG_CONST_STACK_SIZE,
				271	};
				272
Alexei Starovoitov	2541517	2015-03-25 12:49:20 -0700	[diff] [blame]	273	static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
				274	{
				275	switch (func_id) {
				276	case BPF_FUNC_map_lookup_elem:
				277	return &bpf_map_lookup_elem_proto;
				278	case BPF_FUNC_map_update_elem:
				279	return &bpf_map_update_elem_proto;
				280	case BPF_FUNC_map_delete_elem:
				281	return &bpf_map_delete_elem_proto;
				282	case BPF_FUNC_probe_read:
				283	return &bpf_probe_read_proto;
Alexei Starovoitov	d9847d3	2015-03-25 12:49:21 -0700	[diff] [blame]	284	case BPF_FUNC_ktime_get_ns:
				285	return &bpf_ktime_get_ns_proto;
Alexei Starovoitov	04fd61a	2015-05-19 16:59:03 -0700	[diff] [blame]	286	case BPF_FUNC_tail_call:
				287	return &bpf_tail_call_proto;
Alexei Starovoitov	ffeedaf	2015-06-12 19:39:12 -0700	[diff] [blame]	288	case BPF_FUNC_get_current_pid_tgid:
				289	return &bpf_get_current_pid_tgid_proto;
				290	case BPF_FUNC_get_current_uid_gid:
				291	return &bpf_get_current_uid_gid_proto;
				292	case BPF_FUNC_get_current_comm:
				293	return &bpf_get_current_comm_proto;
Alexei Starovoitov	9c959c8	2015-03-25 12:49:22 -0700	[diff] [blame]	294	case BPF_FUNC_trace_printk:
Alexei Starovoitov	0756ea3	2015-06-12 19:39:13 -0700	[diff] [blame]	295	return bpf_get_trace_printk_proto();
Alexei Starovoitov	ab1973d	2015-06-12 19:39:14 -0700	[diff] [blame]	296	case BPF_FUNC_get_smp_processor_id:
				297	return &bpf_get_smp_processor_id_proto;
Kaixu Xia	35578d7	2015-08-06 07:02:35 +0000	[diff] [blame]	298	case BPF_FUNC_perf_event_read:
				299	return &bpf_perf_event_read_proto;
Alexei Starovoitov	a43eec3	2015-10-20 20:02:34 -0700	[diff] [blame]	300	case BPF_FUNC_perf_event_output:
				301	return &bpf_perf_event_output_proto;
Alexei Starovoitov	2541517	2015-03-25 12:49:20 -0700	[diff] [blame]	302	default:
				303	return NULL;
				304	}
				305	}
				306
				307	/* bpf+kprobe programs can access fields of 'struct pt_regs' */
				308	static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type)
				309	{
				310	/* check bounds */
				311	if (off < 0 \|\| off >= sizeof(struct pt_regs))
				312	return false;
				313
				314	/* only read is allowed */
				315	if (type != BPF_READ)
				316	return false;
				317
				318	/* disallow misaligned access */
				319	if (off % size != 0)
				320	return false;
				321
				322	return true;
				323	}
				324
				325	static struct bpf_verifier_ops kprobe_prog_ops = {
				326	.get_func_proto = kprobe_prog_func_proto,
				327	.is_valid_access = kprobe_prog_is_valid_access,
				328	};
				329
				330	static struct bpf_prog_type_list kprobe_tl = {
				331	.ops = &kprobe_prog_ops,
				332	.type = BPF_PROG_TYPE_KPROBE,
				333	};
				334
				335	static int __init register_kprobe_prog_ops(void)
				336	{
				337	bpf_register_prog_type(&kprobe_tl);
				338	return 0;
				339	}
				340	late_initcall(register_kprobe_prog_ops);