blob: 1db5b375fcf49e4cb55701cad91ca4efbd5e2b54 [file] [log] [blame]
Waiman Long45e898b2015-11-09 19:09:25 -05001/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License as published by
4 * the Free Software Foundation; either version 2 of the License, or
5 * (at your option) any later version.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * Authors: Waiman Long <waiman.long@hpe.com>
13 */
14
15/*
16 * When queued spinlock statistical counters are enabled, the following
17 * debugfs files will be created for reporting the counter values:
18 *
19 * <debugfs>/qlockstat/
20 * pv_hash_hops - average # of hops per hashing operation
21 * pv_kick_unlock - # of vCPU kicks issued at unlock time
22 * pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake
23 * pv_latency_kick - average latency (ns) of vCPU kick operation
24 * pv_latency_wake - average latency (ns) from vCPU kick to wakeup
Waiman Long1c4941f2015-11-10 16:18:56 -050025 * pv_lock_stealing - # of lock stealing operations
Waiman Long08be8f62016-05-31 12:53:47 -040026 * pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs
27 * pv_wait_again - # of wait's after a queue head vCPU kick
Waiman Longcd0272f2015-11-09 19:09:27 -050028 * pv_wait_early - # of early vCPU wait's
Waiman Long45e898b2015-11-09 19:09:25 -050029 * pv_wait_head - # of vCPU wait's at the queue head
30 * pv_wait_node - # of vCPU wait's at a non-head queue node
Waiman Long81d3dc92018-04-26 11:34:27 +010031 * lock_pending - # of locking operations via pending code
32 * lock_slowpath - # of locking operations via MCS lock queue
Waiman Long412f34a2019-01-29 22:53:46 +010033 * lock_use_node2 - # of locking operations that use 2nd per-CPU node
34 * lock_use_node3 - # of locking operations that use 3rd per-CPU node
35 * lock_use_node4 - # of locking operations that use 4th per-CPU node
36 * lock_no_node - # of locking operations without using per-CPU node
37 *
38 * Subtracting lock_use_node[234] from lock_slowpath will give you
39 * lock_use_node1.
Waiman Long45e898b2015-11-09 19:09:25 -050040 *
Waiman Longad53fa12019-04-04 13:43:16 -040041 * Writing to the special ".reset_counts" file will reset all the above
42 * counter values.
Waiman Long45e898b2015-11-09 19:09:25 -050043 *
44 * These statistical counters are implemented as per-cpu variables which are
45 * summed and computed whenever the corresponding debugfs files are read. This
46 * minimizes added overhead making the counters usable even in a production
47 * environment.
48 *
49 * There may be slight difference between pv_kick_wake and pv_kick_unlock.
50 */
Waiman Longad53fa12019-04-04 13:43:16 -040051#include "lock_events.h"
Waiman Long45e898b2015-11-09 19:09:25 -050052
53#ifdef CONFIG_QUEUED_LOCK_STAT
54/*
55 * Collect pvqspinlock statistics
56 */
57#include <linux/debugfs.h>
58#include <linux/sched.h>
Ingo Molnare6017572017-02-01 16:36:40 +010059#include <linux/sched/clock.h>
Waiman Long45e898b2015-11-09 19:09:25 -050060#include <linux/fs.h>
61
Waiman Longad53fa12019-04-04 13:43:16 -040062#define EVENT_COUNT(ev) lockevents[LOCKEVENT_ ## ev]
63
64#undef LOCK_EVENT
65#define LOCK_EVENT(name) [LOCKEVENT_ ## name] = #name,
66
67static const char * const lockevent_names[lockevent_num + 1] = {
68
69#include "lock_events_list.h"
70
71 [LOCKEVENT_reset_cnts] = ".reset_counts",
Waiman Long45e898b2015-11-09 19:09:25 -050072};
73
74/*
75 * Per-cpu counters
76 */
Waiman Longad53fa12019-04-04 13:43:16 -040077DEFINE_PER_CPU(unsigned long, lockevents[lockevent_num]);
Waiman Long45e898b2015-11-09 19:09:25 -050078static DEFINE_PER_CPU(u64, pv_kick_time);
79
80/*
81 * Function to read and return the qlock statistical counter values
82 *
83 * The following counters are handled specially:
Waiman Longad53fa12019-04-04 13:43:16 -040084 * 1. pv_latency_kick
Waiman Long45e898b2015-11-09 19:09:25 -050085 * Average kick latency (ns) = pv_latency_kick/pv_kick_unlock
Waiman Longad53fa12019-04-04 13:43:16 -040086 * 2. pv_latency_wake
Waiman Long45e898b2015-11-09 19:09:25 -050087 * Average wake latency (ns) = pv_latency_wake/pv_kick_wake
Waiman Longad53fa12019-04-04 13:43:16 -040088 * 3. pv_hash_hops
Waiman Long45e898b2015-11-09 19:09:25 -050089 * Average hops/hash = pv_hash_hops/pv_kick_unlock
90 */
Waiman Longad53fa12019-04-04 13:43:16 -040091static ssize_t lockevent_read(struct file *file, char __user *user_buf,
92 size_t count, loff_t *ppos)
Waiman Long45e898b2015-11-09 19:09:25 -050093{
94 char buf[64];
Waiman Longad53fa12019-04-04 13:43:16 -040095 int cpu, id, len;
96 u64 sum = 0, kicks = 0;
Waiman Long45e898b2015-11-09 19:09:25 -050097
98 /*
99 * Get the counter ID stored in file->f_inode->i_private
100 */
Waiman Longad53fa12019-04-04 13:43:16 -0400101 id = (long)file_inode(file)->i_private;
Waiman Long45e898b2015-11-09 19:09:25 -0500102
Waiman Longad53fa12019-04-04 13:43:16 -0400103 if (id >= lockevent_num)
Waiman Long45e898b2015-11-09 19:09:25 -0500104 return -EBADF;
105
106 for_each_possible_cpu(cpu) {
Waiman Longad53fa12019-04-04 13:43:16 -0400107 sum += per_cpu(lockevents[id], cpu);
Waiman Long45e898b2015-11-09 19:09:25 -0500108 /*
Waiman Longad53fa12019-04-04 13:43:16 -0400109 * Need to sum additional counters for some of them
Waiman Long45e898b2015-11-09 19:09:25 -0500110 */
Waiman Longad53fa12019-04-04 13:43:16 -0400111 switch (id) {
Waiman Long45e898b2015-11-09 19:09:25 -0500112
Waiman Longad53fa12019-04-04 13:43:16 -0400113 case LOCKEVENT_pv_latency_kick:
114 case LOCKEVENT_pv_hash_hops:
115 kicks += per_cpu(EVENT_COUNT(pv_kick_unlock), cpu);
Waiman Long45e898b2015-11-09 19:09:25 -0500116 break;
117
Waiman Longad53fa12019-04-04 13:43:16 -0400118 case LOCKEVENT_pv_latency_wake:
119 kicks += per_cpu(EVENT_COUNT(pv_kick_wake), cpu);
Waiman Long45e898b2015-11-09 19:09:25 -0500120 break;
121 }
122 }
123
Waiman Longad53fa12019-04-04 13:43:16 -0400124 if (id == LOCKEVENT_pv_hash_hops) {
Davidlohr Bueso66876592016-04-17 23:31:41 -0700125 u64 frac = 0;
Waiman Long45e898b2015-11-09 19:09:25 -0500126
Davidlohr Bueso66876592016-04-17 23:31:41 -0700127 if (kicks) {
Waiman Longad53fa12019-04-04 13:43:16 -0400128 frac = 100ULL * do_div(sum, kicks);
Davidlohr Bueso66876592016-04-17 23:31:41 -0700129 frac = DIV_ROUND_CLOSEST_ULL(frac, kicks);
130 }
Waiman Long45e898b2015-11-09 19:09:25 -0500131
132 /*
133 * Return a X.XX decimal number
134 */
Waiman Longad53fa12019-04-04 13:43:16 -0400135 len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n",
136 sum, frac);
Waiman Long45e898b2015-11-09 19:09:25 -0500137 } else {
138 /*
139 * Round to the nearest ns
140 */
Waiman Longad53fa12019-04-04 13:43:16 -0400141 if ((id == LOCKEVENT_pv_latency_kick) ||
142 (id == LOCKEVENT_pv_latency_wake)) {
Waiman Long45e898b2015-11-09 19:09:25 -0500143 if (kicks)
Waiman Longad53fa12019-04-04 13:43:16 -0400144 sum = DIV_ROUND_CLOSEST_ULL(sum, kicks);
Waiman Long45e898b2015-11-09 19:09:25 -0500145 }
Waiman Longad53fa12019-04-04 13:43:16 -0400146 len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum);
Waiman Long45e898b2015-11-09 19:09:25 -0500147 }
148
149 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
150}
151
152/*
153 * Function to handle write request
154 *
Waiman Longad53fa12019-04-04 13:43:16 -0400155 * When id = .reset_cnts, reset all the counter values.
Waiman Long45e898b2015-11-09 19:09:25 -0500156 */
Waiman Longad53fa12019-04-04 13:43:16 -0400157static ssize_t lockevent_write(struct file *file, const char __user *user_buf,
Waiman Long45e898b2015-11-09 19:09:25 -0500158 size_t count, loff_t *ppos)
159{
160 int cpu;
161
162 /*
163 * Get the counter ID stored in file->f_inode->i_private
164 */
Waiman Longad53fa12019-04-04 13:43:16 -0400165 if ((long)file_inode(file)->i_private != LOCKEVENT_reset_cnts)
Waiman Long45e898b2015-11-09 19:09:25 -0500166 return count;
167
168 for_each_possible_cpu(cpu) {
169 int i;
Waiman Longad53fa12019-04-04 13:43:16 -0400170 unsigned long *ptr = per_cpu_ptr(lockevents, cpu);
Waiman Long45e898b2015-11-09 19:09:25 -0500171
Waiman Longad53fa12019-04-04 13:43:16 -0400172 for (i = 0 ; i < lockevent_num; i++)
Waiman Long45e898b2015-11-09 19:09:25 -0500173 WRITE_ONCE(ptr[i], 0);
Waiman Long45e898b2015-11-09 19:09:25 -0500174 }
175 return count;
176}
177
178/*
179 * Debugfs data structures
180 */
Waiman Longad53fa12019-04-04 13:43:16 -0400181static const struct file_operations fops_lockevent = {
182 .read = lockevent_read,
183 .write = lockevent_write,
Waiman Long45e898b2015-11-09 19:09:25 -0500184 .llseek = default_llseek,
185};
186
187/*
188 * Initialize debugfs for the qspinlock statistical counters
189 */
190static int __init init_qspinlock_stat(void)
191{
Waiman Longad53fa12019-04-04 13:43:16 -0400192 struct dentry *d_counts = debugfs_create_dir("qlockstat", NULL);
Waiman Long45e898b2015-11-09 19:09:25 -0500193 int i;
194
Waiman Longad53fa12019-04-04 13:43:16 -0400195 if (!d_counts)
Davidlohr Buesob96bbdd2016-04-19 21:17:25 -0700196 goto out;
Waiman Long45e898b2015-11-09 19:09:25 -0500197
198 /*
199 * Create the debugfs files
200 *
201 * As reading from and writing to the stat files can be slow, only
202 * root is allowed to do the read/write to limit impact to system
203 * performance.
204 */
Waiman Longad53fa12019-04-04 13:43:16 -0400205 for (i = 0; i < lockevent_num; i++)
206 if (!debugfs_create_file(lockevent_names[i], 0400, d_counts,
207 (void *)(long)i, &fops_lockevent))
Davidlohr Buesob96bbdd2016-04-19 21:17:25 -0700208 goto fail_undo;
Waiman Long45e898b2015-11-09 19:09:25 -0500209
Waiman Longad53fa12019-04-04 13:43:16 -0400210 if (!debugfs_create_file(lockevent_names[LOCKEVENT_reset_cnts], 0200,
211 d_counts, (void *)(long)LOCKEVENT_reset_cnts,
212 &fops_lockevent))
Davidlohr Buesob96bbdd2016-04-19 21:17:25 -0700213 goto fail_undo;
214
Waiman Long45e898b2015-11-09 19:09:25 -0500215 return 0;
Davidlohr Buesob96bbdd2016-04-19 21:17:25 -0700216fail_undo:
Waiman Longad53fa12019-04-04 13:43:16 -0400217 debugfs_remove_recursive(d_counts);
Davidlohr Buesob96bbdd2016-04-19 21:17:25 -0700218out:
219 pr_warn("Could not create 'qlockstat' debugfs entries\n");
220 return -ENOMEM;
Waiman Long45e898b2015-11-09 19:09:25 -0500221}
222fs_initcall(init_qspinlock_stat);
223
224/*
Waiman Long45e898b2015-11-09 19:09:25 -0500225 * PV hash hop count
226 */
Waiman Longad53fa12019-04-04 13:43:16 -0400227static inline void lockevent_pv_hop(int hopcnt)
Waiman Long45e898b2015-11-09 19:09:25 -0500228{
Waiman Longad53fa12019-04-04 13:43:16 -0400229 this_cpu_add(EVENT_COUNT(pv_hash_hops), hopcnt);
Waiman Long45e898b2015-11-09 19:09:25 -0500230}
231
232/*
233 * Replacement function for pv_kick()
234 */
235static inline void __pv_kick(int cpu)
236{
237 u64 start = sched_clock();
238
239 per_cpu(pv_kick_time, cpu) = start;
240 pv_kick(cpu);
Waiman Longad53fa12019-04-04 13:43:16 -0400241 this_cpu_add(EVENT_COUNT(pv_latency_kick), sched_clock() - start);
Waiman Long45e898b2015-11-09 19:09:25 -0500242}
243
244/*
245 * Replacement function for pv_wait()
246 */
247static inline void __pv_wait(u8 *ptr, u8 val)
248{
249 u64 *pkick_time = this_cpu_ptr(&pv_kick_time);
250
251 *pkick_time = 0;
252 pv_wait(ptr, val);
253 if (*pkick_time) {
Waiman Longad53fa12019-04-04 13:43:16 -0400254 this_cpu_add(EVENT_COUNT(pv_latency_wake),
Waiman Long45e898b2015-11-09 19:09:25 -0500255 sched_clock() - *pkick_time);
Waiman Longad53fa12019-04-04 13:43:16 -0400256 lockevent_inc(pv_kick_wake);
Waiman Long45e898b2015-11-09 19:09:25 -0500257 }
258}
259
260#define pv_kick(c) __pv_kick(c)
261#define pv_wait(p, v) __pv_wait(p, v)
262
Waiman Long45e898b2015-11-09 19:09:25 -0500263#else /* CONFIG_QUEUED_LOCK_STAT */
264
Waiman Longad53fa12019-04-04 13:43:16 -0400265static inline void lockevent_pv_hop(int hopcnt) { }
Waiman Long45e898b2015-11-09 19:09:25 -0500266
267#endif /* CONFIG_QUEUED_LOCK_STAT */