Arvind M | 8e87d85 | 2018-01-29 00:04:29 -0800 | [diff] [blame] | 1 | From a9ac12caf8ce183c5b08e9bad183fc0196985da0 Mon Sep 17 00:00:00 2001 |
Allen Martin | 685e0f8 | 2016-07-26 19:34:29 -0700 | [diff] [blame] | 2 | From: Carsten Emde <C.Emde@osadl.org> |
| 3 | Date: Tue, 19 Jul 2011 13:53:12 +0100 |
Arvind M | 10268e7 | 2017-12-04 22:18:06 -0800 | [diff] [blame] | 4 | Subject: [PATCH 024/366] hwlatdetect.patch |
Allen Martin | 685e0f8 | 2016-07-26 19:34:29 -0700 | [diff] [blame] | 5 | |
| 6 | Jon Masters developed this wonderful SMI detector. For details please |
| 7 | consult Documentation/hwlat_detector.txt. It could be ported to Linux |
| 8 | 3.0 RT without any major change. |
| 9 | |
| 10 | Signed-off-by: Carsten Emde <C.Emde@osadl.org> |
Allen Martin | 685e0f8 | 2016-07-26 19:34:29 -0700 | [diff] [blame] | 11 | --- |
| 12 | Documentation/hwlat_detector.txt | 64 ++ |
| 13 | drivers/misc/Kconfig | 29 + |
| 14 | drivers/misc/Makefile | 1 + |
| 15 | drivers/misc/hwlat_detector.c | 1212 ++++++++++++++++++++++++++++++++++++++ |
| 16 | 4 files changed, 1306 insertions(+) |
| 17 | create mode 100644 Documentation/hwlat_detector.txt |
| 18 | create mode 100644 drivers/misc/hwlat_detector.c |
| 19 | |
| 20 | diff --git a/Documentation/hwlat_detector.txt b/Documentation/hwlat_detector.txt |
| 21 | new file mode 100644 |
Ishan Mittal | b799826 | 2017-01-17 16:11:50 +0530 | [diff] [blame] | 22 | index 0000000..cb61516 |
Allen Martin | 685e0f8 | 2016-07-26 19:34:29 -0700 | [diff] [blame] | 23 | --- /dev/null |
| 24 | +++ b/Documentation/hwlat_detector.txt |
| 25 | @@ -0,0 +1,64 @@ |
| 26 | +Introduction: |
| 27 | +------------- |
| 28 | + |
| 29 | +The module hwlat_detector is a special purpose kernel module that is used to |
| 30 | +detect large system latencies induced by the behavior of certain underlying |
| 31 | +hardware or firmware, independent of Linux itself. The code was developed |
| 32 | +originally to detect SMIs (System Management Interrupts) on x86 systems, |
| 33 | +however there is nothing x86 specific about this patchset. It was |
| 34 | +originally written for use by the "RT" patch since the Real Time |
| 35 | +kernel is highly latency sensitive. |
| 36 | + |
| 37 | +SMIs are usually not serviced by the Linux kernel, which typically does not |
| 38 | +even know that they are occuring. SMIs are instead are set up by BIOS code |
| 39 | +and are serviced by BIOS code, usually for "critical" events such as |
| 40 | +management of thermal sensors and fans. Sometimes though, SMIs are used for |
| 41 | +other tasks and those tasks can spend an inordinate amount of time in the |
| 42 | +handler (sometimes measured in milliseconds). Obviously this is a problem if |
| 43 | +you are trying to keep event service latencies down in the microsecond range. |
| 44 | + |
| 45 | +The hardware latency detector works by hogging all of the cpus for configurable |
| 46 | +amounts of time (by calling stop_machine()), polling the CPU Time Stamp Counter |
| 47 | +for some period, then looking for gaps in the TSC data. Any gap indicates a |
| 48 | +time when the polling was interrupted and since the machine is stopped and |
| 49 | +interrupts turned off the only thing that could do that would be an SMI. |
| 50 | + |
| 51 | +Note that the SMI detector should *NEVER* be used in a production environment. |
| 52 | +It is intended to be run manually to determine if the hardware platform has a |
| 53 | +problem with long system firmware service routines. |
| 54 | + |
| 55 | +Usage: |
| 56 | +------ |
| 57 | + |
| 58 | +Loading the module hwlat_detector passing the parameter "enabled=1" (or by |
| 59 | +setting the "enable" entry in "hwlat_detector" debugfs toggled on) is the only |
| 60 | +step required to start the hwlat_detector. It is possible to redefine the |
| 61 | +threshold in microseconds (us) above which latency spikes will be taken |
| 62 | +into account (parameter "threshold="). |
| 63 | + |
| 64 | +Example: |
| 65 | + |
| 66 | + # modprobe hwlat_detector enabled=1 threshold=100 |
| 67 | + |
| 68 | +After the module is loaded, it creates a directory named "hwlat_detector" under |
| 69 | +the debugfs mountpoint, "/debug/hwlat_detector" for this text. It is necessary |
| 70 | +to have debugfs mounted, which might be on /sys/debug on your system. |
| 71 | + |
| 72 | +The /debug/hwlat_detector interface contains the following files: |
| 73 | + |
| 74 | +count - number of latency spikes observed since last reset |
| 75 | +enable - a global enable/disable toggle (0/1), resets count |
| 76 | +max - maximum hardware latency actually observed (usecs) |
| 77 | +sample - a pipe from which to read current raw sample data |
| 78 | + in the format <timestamp> <latency observed usecs> |
| 79 | + (can be opened O_NONBLOCK for a single sample) |
| 80 | +threshold - minimum latency value to be considered (usecs) |
| 81 | +width - time period to sample with CPUs held (usecs) |
| 82 | + must be less than the total window size (enforced) |
| 83 | +window - total period of sampling, width being inside (usecs) |
| 84 | + |
| 85 | +By default we will set width to 500,000 and window to 1,000,000, meaning that |
| 86 | +we will sample every 1,000,000 usecs (1s) for 500,000 usecs (0.5s). If we |
| 87 | +observe any latencies that exceed the threshold (initially 100 usecs), |
| 88 | +then we write to a global sample ring buffer of 8K samples, which is |
| 89 | +consumed by reading from the "sample" (pipe) debugfs file interface. |
| 90 | diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig |
Arvind M | 8e87d85 | 2018-01-29 00:04:29 -0800 | [diff] [blame] | 91 | index 7dcd598..79f1ed0 100644 |
Allen Martin | 685e0f8 | 2016-07-26 19:34:29 -0700 | [diff] [blame] | 92 | --- a/drivers/misc/Kconfig |
| 93 | +++ b/drivers/misc/Kconfig |
| 94 | @@ -121,6 +121,35 @@ config IBM_ASM |
| 95 | for information on the specific driver level and support statement |
| 96 | for your IBM server. |
| 97 | |
| 98 | +config HWLAT_DETECTOR |
| 99 | + tristate "Testing module to detect hardware-induced latencies" |
| 100 | + depends on DEBUG_FS |
| 101 | + depends on RING_BUFFER |
| 102 | + default m |
| 103 | + ---help--- |
| 104 | + A simple hardware latency detector. Use this module to detect |
| 105 | + large latencies introduced by the behavior of the underlying |
| 106 | + system firmware external to Linux. We do this using periodic |
| 107 | + use of stop_machine to grab all available CPUs and measure |
| 108 | + for unexplainable gaps in the CPU timestamp counter(s). By |
| 109 | + default, the module is not enabled until the "enable" file |
| 110 | + within the "hwlat_detector" debugfs directory is toggled. |
| 111 | + |
| 112 | + This module is often used to detect SMI (System Management |
| 113 | + Interrupts) on x86 systems, though is not x86 specific. To |
| 114 | + this end, we default to using a sample window of 1 second, |
| 115 | + during which we will sample for 0.5 seconds. If an SMI or |
| 116 | + similar event occurs during that time, it is recorded |
| 117 | + into an 8K samples global ring buffer until retreived. |
| 118 | + |
| 119 | + WARNING: This software should never be enabled (it can be built |
| 120 | + but should not be turned on after it is loaded) in a production |
| 121 | + environment where high latencies are a concern since the |
| 122 | + sampling mechanism actually introduces latencies for |
| 123 | + regular tasks while the CPU(s) are being held. |
| 124 | + |
| 125 | + If unsure, say N |
| 126 | + |
| 127 | config PHANTOM |
| 128 | tristate "Sensable PHANToM (PCI)" |
| 129 | depends on PCI |
| 130 | diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile |
Arvind M | 8e87d85 | 2018-01-29 00:04:29 -0800 | [diff] [blame] | 131 | index fb917fb..ef5ae7e 100644 |
Allen Martin | 685e0f8 | 2016-07-26 19:34:29 -0700 | [diff] [blame] | 132 | --- a/drivers/misc/Makefile |
| 133 | +++ b/drivers/misc/Makefile |
Arvind M | 10268e7 | 2017-12-04 22:18:06 -0800 | [diff] [blame] | 134 | @@ -42,6 +42,7 @@ obj-$(CONFIG_C2PORT) += c2port/ |
Allen Martin | 685e0f8 | 2016-07-26 19:34:29 -0700 | [diff] [blame] | 135 | obj-$(CONFIG_HMC6352) += hmc6352.o |
| 136 | obj-y += eeprom/ |
| 137 | obj-y += cb710/ |
| 138 | +obj-$(CONFIG_HWLAT_DETECTOR) += hwlat_detector.o |
| 139 | obj-$(CONFIG_SPEAR13XX_PCIE_GADGET) += spear13xx_pcie_gadget.o |
| 140 | obj-$(CONFIG_VMWARE_BALLOON) += vmw_balloon.o |
| 141 | obj-$(CONFIG_ARM_CHARLCD) += arm-charlcd.o |
| 142 | diff --git a/drivers/misc/hwlat_detector.c b/drivers/misc/hwlat_detector.c |
| 143 | new file mode 100644 |
Ishan Mittal | b799826 | 2017-01-17 16:11:50 +0530 | [diff] [blame] | 144 | index 0000000..6864f3c |
Allen Martin | 685e0f8 | 2016-07-26 19:34:29 -0700 | [diff] [blame] | 145 | --- /dev/null |
| 146 | +++ b/drivers/misc/hwlat_detector.c |
| 147 | @@ -0,0 +1,1212 @@ |
| 148 | +/* |
| 149 | + * hwlat_detector.c - A simple Hardware Latency detector. |
| 150 | + * |
| 151 | + * Use this module to detect large system latencies induced by the behavior of |
| 152 | + * certain underlying system hardware or firmware, independent of Linux itself. |
| 153 | + * The code was developed originally to detect the presence of SMIs on Intel |
| 154 | + * and AMD systems, although there is no dependency upon x86 herein. |
| 155 | + * |
| 156 | + * The classical example usage of this module is in detecting the presence of |
| 157 | + * SMIs or System Management Interrupts on Intel and AMD systems. An SMI is a |
| 158 | + * somewhat special form of hardware interrupt spawned from earlier CPU debug |
| 159 | + * modes in which the (BIOS/EFI/etc.) firmware arranges for the South Bridge |
| 160 | + * LPC (or other device) to generate a special interrupt under certain |
| 161 | + * circumstances, for example, upon expiration of a special SMI timer device, |
| 162 | + * due to certain external thermal readings, on certain I/O address accesses, |
| 163 | + * and other situations. An SMI hits a special CPU pin, triggers a special |
| 164 | + * SMI mode (complete with special memory map), and the OS is unaware. |
| 165 | + * |
| 166 | + * Although certain hardware-inducing latencies are necessary (for example, |
| 167 | + * a modern system often requires an SMI handler for correct thermal control |
| 168 | + * and remote management) they can wreak havoc upon any OS-level performance |
| 169 | + * guarantees toward low-latency, especially when the OS is not even made |
| 170 | + * aware of the presence of these interrupts. For this reason, we need a |
| 171 | + * somewhat brute force mechanism to detect these interrupts. In this case, |
| 172 | + * we do it by hogging all of the CPU(s) for configurable timer intervals, |
| 173 | + * sampling the built-in CPU timer, looking for discontiguous readings. |
| 174 | + * |
| 175 | + * WARNING: This implementation necessarily introduces latencies. Therefore, |
| 176 | + * you should NEVER use this module in a production environment |
| 177 | + * requiring any kind of low-latency performance guarantee(s). |
| 178 | + * |
| 179 | + * Copyright (C) 2008-2009 Jon Masters, Red Hat, Inc. <jcm@redhat.com> |
| 180 | + * |
| 181 | + * Includes useful feedback from Clark Williams <clark@redhat.com> |
| 182 | + * |
| 183 | + * This file is licensed under the terms of the GNU General Public |
| 184 | + * License version 2. This program is licensed "as is" without any |
| 185 | + * warranty of any kind, whether express or implied. |
| 186 | + */ |
| 187 | + |
| 188 | +#include <linux/module.h> |
| 189 | +#include <linux/init.h> |
| 190 | +#include <linux/ring_buffer.h> |
| 191 | +#include <linux/stop_machine.h> |
| 192 | +#include <linux/time.h> |
| 193 | +#include <linux/hrtimer.h> |
| 194 | +#include <linux/kthread.h> |
| 195 | +#include <linux/debugfs.h> |
| 196 | +#include <linux/seq_file.h> |
| 197 | +#include <linux/uaccess.h> |
| 198 | +#include <linux/version.h> |
| 199 | +#include <linux/delay.h> |
| 200 | +#include <linux/slab.h> |
| 201 | + |
| 202 | +#define BUF_SIZE_DEFAULT 262144UL /* 8K*(sizeof(entry)) */ |
| 203 | +#define BUF_FLAGS (RB_FL_OVERWRITE) /* no block on full */ |
| 204 | +#define U64STR_SIZE 22 /* 20 digits max */ |
| 205 | + |
| 206 | +#define VERSION "1.0.0" |
| 207 | +#define BANNER "hwlat_detector: " |
| 208 | +#define DRVNAME "hwlat_detector" |
| 209 | +#define DEFAULT_SAMPLE_WINDOW 1000000 /* 1s */ |
| 210 | +#define DEFAULT_SAMPLE_WIDTH 500000 /* 0.5s */ |
| 211 | +#define DEFAULT_LAT_THRESHOLD 10 /* 10us */ |
| 212 | + |
| 213 | +/* Module metadata */ |
| 214 | + |
| 215 | +MODULE_LICENSE("GPL"); |
| 216 | +MODULE_AUTHOR("Jon Masters <jcm@redhat.com>"); |
| 217 | +MODULE_DESCRIPTION("A simple hardware latency detector"); |
| 218 | +MODULE_VERSION(VERSION); |
| 219 | + |
| 220 | +/* Module parameters */ |
| 221 | + |
| 222 | +static int debug; |
| 223 | +static int enabled; |
| 224 | +static int threshold; |
| 225 | + |
| 226 | +module_param(debug, int, 0); /* enable debug */ |
| 227 | +module_param(enabled, int, 0); /* enable detector */ |
| 228 | +module_param(threshold, int, 0); /* latency threshold */ |
| 229 | + |
| 230 | +/* Buffering and sampling */ |
| 231 | + |
| 232 | +static struct ring_buffer *ring_buffer; /* sample buffer */ |
| 233 | +static DEFINE_MUTEX(ring_buffer_mutex); /* lock changes */ |
| 234 | +static unsigned long buf_size = BUF_SIZE_DEFAULT; |
| 235 | +static struct task_struct *kthread; /* sampling thread */ |
| 236 | + |
| 237 | +/* DebugFS filesystem entries */ |
| 238 | + |
| 239 | +static struct dentry *debug_dir; /* debugfs directory */ |
| 240 | +static struct dentry *debug_max; /* maximum TSC delta */ |
| 241 | +static struct dentry *debug_count; /* total detect count */ |
| 242 | +static struct dentry *debug_sample_width; /* sample width us */ |
| 243 | +static struct dentry *debug_sample_window; /* sample window us */ |
| 244 | +static struct dentry *debug_sample; /* raw samples us */ |
| 245 | +static struct dentry *debug_threshold; /* threshold us */ |
| 246 | +static struct dentry *debug_enable; /* enable/disable */ |
| 247 | + |
| 248 | +/* Individual samples and global state */ |
| 249 | + |
| 250 | +struct sample; /* latency sample */ |
| 251 | +struct data; /* Global state */ |
| 252 | + |
| 253 | +/* Sampling functions */ |
| 254 | +static int __buffer_add_sample(struct sample *sample); |
| 255 | +static struct sample *buffer_get_sample(struct sample *sample); |
| 256 | +static int get_sample(void *unused); |
| 257 | + |
| 258 | +/* Threading and state */ |
| 259 | +static int kthread_fn(void *unused); |
| 260 | +static int start_kthread(void); |
| 261 | +static int stop_kthread(void); |
| 262 | +static void __reset_stats(void); |
| 263 | +static int init_stats(void); |
| 264 | + |
| 265 | +/* Debugfs interface */ |
| 266 | +static ssize_t simple_data_read(struct file *filp, char __user *ubuf, |
| 267 | + size_t cnt, loff_t *ppos, const u64 *entry); |
| 268 | +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, |
| 269 | + size_t cnt, loff_t *ppos, u64 *entry); |
| 270 | +static int debug_sample_fopen(struct inode *inode, struct file *filp); |
| 271 | +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, |
| 272 | + size_t cnt, loff_t *ppos); |
| 273 | +static int debug_sample_release(struct inode *inode, struct file *filp); |
| 274 | +static int debug_enable_fopen(struct inode *inode, struct file *filp); |
| 275 | +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, |
| 276 | + size_t cnt, loff_t *ppos); |
| 277 | +static ssize_t debug_enable_fwrite(struct file *file, |
| 278 | + const char __user *user_buffer, |
| 279 | + size_t user_size, loff_t *offset); |
| 280 | + |
| 281 | +/* Initialization functions */ |
| 282 | +static int init_debugfs(void); |
| 283 | +static void free_debugfs(void); |
| 284 | +static int detector_init(void); |
| 285 | +static void detector_exit(void); |
| 286 | + |
| 287 | +/* Individual latency samples are stored here when detected and packed into |
| 288 | + * the ring_buffer circular buffer, where they are overwritten when |
| 289 | + * more than buf_size/sizeof(sample) samples are received. */ |
| 290 | +struct sample { |
| 291 | + u64 seqnum; /* unique sequence */ |
| 292 | + u64 duration; /* ktime delta */ |
| 293 | + struct timespec timestamp; /* wall time */ |
| 294 | + unsigned long lost; |
| 295 | +}; |
| 296 | + |
| 297 | +/* keep the global state somewhere. Mostly used under stop_machine. */ |
| 298 | +static struct data { |
| 299 | + |
| 300 | + struct mutex lock; /* protect changes */ |
| 301 | + |
| 302 | + u64 count; /* total since reset */ |
| 303 | + u64 max_sample; /* max hardware latency */ |
| 304 | + u64 threshold; /* sample threshold level */ |
| 305 | + |
| 306 | + u64 sample_window; /* total sampling window (on+off) */ |
| 307 | + u64 sample_width; /* active sampling portion of window */ |
| 308 | + |
| 309 | + atomic_t sample_open; /* whether the sample file is open */ |
| 310 | + |
| 311 | + wait_queue_head_t wq; /* waitqeue for new sample values */ |
| 312 | + |
| 313 | +} data; |
| 314 | + |
| 315 | +/** |
| 316 | + * __buffer_add_sample - add a new latency sample recording to the ring buffer |
| 317 | + * @sample: The new latency sample value |
| 318 | + * |
| 319 | + * This receives a new latency sample and records it in a global ring buffer. |
| 320 | + * No additional locking is used in this case - suited for stop_machine use. |
| 321 | + */ |
| 322 | +static int __buffer_add_sample(struct sample *sample) |
| 323 | +{ |
| 324 | + return ring_buffer_write(ring_buffer, |
| 325 | + sizeof(struct sample), sample); |
| 326 | +} |
| 327 | + |
| 328 | +/** |
| 329 | + * buffer_get_sample - remove a hardware latency sample from the ring buffer |
| 330 | + * @sample: Pre-allocated storage for the sample |
| 331 | + * |
| 332 | + * This retrieves a hardware latency sample from the global circular buffer |
| 333 | + */ |
| 334 | +static struct sample *buffer_get_sample(struct sample *sample) |
| 335 | +{ |
| 336 | + struct ring_buffer_event *e = NULL; |
| 337 | + struct sample *s = NULL; |
| 338 | + unsigned int cpu = 0; |
| 339 | + |
| 340 | + if (!sample) |
| 341 | + return NULL; |
| 342 | + |
| 343 | + mutex_lock(&ring_buffer_mutex); |
| 344 | + for_each_online_cpu(cpu) { |
| 345 | + e = ring_buffer_consume(ring_buffer, cpu, NULL, &sample->lost); |
| 346 | + if (e) |
| 347 | + break; |
| 348 | + } |
| 349 | + |
| 350 | + if (e) { |
| 351 | + s = ring_buffer_event_data(e); |
| 352 | + memcpy(sample, s, sizeof(struct sample)); |
| 353 | + } else |
| 354 | + sample = NULL; |
| 355 | + mutex_unlock(&ring_buffer_mutex); |
| 356 | + |
| 357 | + return sample; |
| 358 | +} |
| 359 | + |
| 360 | +/** |
| 361 | + * get_sample - sample the CPU TSC and look for likely hardware latencies |
| 362 | + * @unused: This is not used but is a part of the stop_machine API |
| 363 | + * |
| 364 | + * Used to repeatedly capture the CPU TSC (or similar), looking for potential |
| 365 | + * hardware-induced latency. Called under stop_machine, with data.lock held. |
| 366 | + */ |
| 367 | +static int get_sample(void *unused) |
| 368 | +{ |
| 369 | + ktime_t start, t1, t2; |
| 370 | + s64 diff, total = 0; |
| 371 | + u64 sample = 0; |
| 372 | + int ret = 1; |
| 373 | + |
| 374 | + start = ktime_get(); /* start timestamp */ |
| 375 | + |
| 376 | + do { |
| 377 | + |
| 378 | + t1 = ktime_get(); /* we'll look for a discontinuity */ |
| 379 | + t2 = ktime_get(); |
| 380 | + |
| 381 | + total = ktime_to_us(ktime_sub(t2, start)); /* sample width */ |
| 382 | + diff = ktime_to_us(ktime_sub(t2, t1)); /* current diff */ |
| 383 | + |
| 384 | + /* This shouldn't happen */ |
| 385 | + if (diff < 0) { |
| 386 | + pr_err(BANNER "time running backwards\n"); |
| 387 | + goto out; |
| 388 | + } |
| 389 | + |
| 390 | + if (diff > sample) |
| 391 | + sample = diff; /* only want highest value */ |
| 392 | + |
| 393 | + } while (total <= data.sample_width); |
| 394 | + |
| 395 | + /* If we exceed the threshold value, we have found a hardware latency */ |
| 396 | + if (sample > data.threshold) { |
| 397 | + struct sample s; |
| 398 | + |
| 399 | + data.count++; |
| 400 | + s.seqnum = data.count; |
| 401 | + s.duration = sample; |
| 402 | + s.timestamp = CURRENT_TIME; |
| 403 | + __buffer_add_sample(&s); |
| 404 | + |
| 405 | + /* Keep a running maximum ever recorded hardware latency */ |
| 406 | + if (sample > data.max_sample) |
| 407 | + data.max_sample = sample; |
| 408 | + } |
| 409 | + |
| 410 | + ret = 0; |
| 411 | +out: |
| 412 | + return ret; |
| 413 | +} |
| 414 | + |
| 415 | +/* |
| 416 | + * kthread_fn - The CPU time sampling/hardware latency detection kernel thread |
| 417 | + * @unused: A required part of the kthread API. |
| 418 | + * |
| 419 | + * Used to periodically sample the CPU TSC via a call to get_sample. We |
| 420 | + * use stop_machine, whith does (intentionally) introduce latency since we |
| 421 | + * need to ensure nothing else might be running (and thus pre-empting). |
| 422 | + * Obviously this should never be used in production environments. |
| 423 | + * |
| 424 | + * stop_machine will schedule us typically only on CPU0 which is fine for |
| 425 | + * almost every real-world hardware latency situation - but we might later |
| 426 | + * generalize this if we find there are any actualy systems with alternate |
| 427 | + * SMI delivery or other non CPU0 hardware latencies. |
| 428 | + */ |
| 429 | +static int kthread_fn(void *unused) |
| 430 | +{ |
| 431 | + int err = 0; |
| 432 | + u64 interval = 0; |
| 433 | + |
| 434 | + while (!kthread_should_stop()) { |
| 435 | + |
| 436 | + mutex_lock(&data.lock); |
| 437 | + |
| 438 | + err = stop_machine(get_sample, unused, 0); |
| 439 | + if (err) { |
| 440 | + /* Houston, we have a problem */ |
| 441 | + mutex_unlock(&data.lock); |
| 442 | + goto err_out; |
| 443 | + } |
| 444 | + |
| 445 | + wake_up(&data.wq); /* wake up reader(s) */ |
| 446 | + |
| 447 | + interval = data.sample_window - data.sample_width; |
| 448 | + do_div(interval, USEC_PER_MSEC); /* modifies interval value */ |
| 449 | + |
| 450 | + mutex_unlock(&data.lock); |
| 451 | + |
| 452 | + if (msleep_interruptible(interval)) |
| 453 | + goto out; |
| 454 | + } |
| 455 | + goto out; |
| 456 | +err_out: |
| 457 | + pr_err(BANNER "could not call stop_machine, disabling\n"); |
| 458 | + enabled = 0; |
| 459 | +out: |
| 460 | + return err; |
| 461 | + |
| 462 | +} |
| 463 | + |
| 464 | +/** |
| 465 | + * start_kthread - Kick off the hardware latency sampling/detector kthread |
| 466 | + * |
| 467 | + * This starts a kernel thread that will sit and sample the CPU timestamp |
| 468 | + * counter (TSC or similar) and look for potential hardware latencies. |
| 469 | + */ |
| 470 | +static int start_kthread(void) |
| 471 | +{ |
| 472 | + kthread = kthread_run(kthread_fn, NULL, |
| 473 | + DRVNAME); |
| 474 | + if (IS_ERR(kthread)) { |
| 475 | + pr_err(BANNER "could not start sampling thread\n"); |
| 476 | + enabled = 0; |
| 477 | + return -ENOMEM; |
| 478 | + } |
| 479 | + |
| 480 | + return 0; |
| 481 | +} |
| 482 | + |
| 483 | +/** |
| 484 | + * stop_kthread - Inform the hardware latency samping/detector kthread to stop |
| 485 | + * |
| 486 | + * This kicks the running hardware latency sampling/detector kernel thread and |
| 487 | + * tells it to stop sampling now. Use this on unload and at system shutdown. |
| 488 | + */ |
| 489 | +static int stop_kthread(void) |
| 490 | +{ |
| 491 | + int ret; |
| 492 | + |
| 493 | + ret = kthread_stop(kthread); |
| 494 | + |
| 495 | + return ret; |
| 496 | +} |
| 497 | + |
| 498 | +/** |
| 499 | + * __reset_stats - Reset statistics for the hardware latency detector |
| 500 | + * |
| 501 | + * We use data to store various statistics and global state. We call this |
| 502 | + * function in order to reset those when "enable" is toggled on or off, and |
| 503 | + * also at initialization. Should be called with data.lock held. |
| 504 | + */ |
| 505 | +static void __reset_stats(void) |
| 506 | +{ |
| 507 | + data.count = 0; |
| 508 | + data.max_sample = 0; |
| 509 | + ring_buffer_reset(ring_buffer); /* flush out old sample entries */ |
| 510 | +} |
| 511 | + |
| 512 | +/** |
| 513 | + * init_stats - Setup global state statistics for the hardware latency detector |
| 514 | + * |
| 515 | + * We use data to store various statistics and global state. We also use |
| 516 | + * a global ring buffer (ring_buffer) to keep raw samples of detected hardware |
| 517 | + * induced system latencies. This function initializes these structures and |
| 518 | + * allocates the global ring buffer also. |
| 519 | + */ |
| 520 | +static int init_stats(void) |
| 521 | +{ |
| 522 | + int ret = -ENOMEM; |
| 523 | + |
| 524 | + mutex_init(&data.lock); |
| 525 | + init_waitqueue_head(&data.wq); |
| 526 | + atomic_set(&data.sample_open, 0); |
| 527 | + |
| 528 | + ring_buffer = ring_buffer_alloc(buf_size, BUF_FLAGS); |
| 529 | + |
| 530 | + if (WARN(!ring_buffer, KERN_ERR BANNER |
| 531 | + "failed to allocate ring buffer!\n")) |
| 532 | + goto out; |
| 533 | + |
| 534 | + __reset_stats(); |
| 535 | + data.threshold = DEFAULT_LAT_THRESHOLD; /* threshold us */ |
| 536 | + data.sample_window = DEFAULT_SAMPLE_WINDOW; /* window us */ |
| 537 | + data.sample_width = DEFAULT_SAMPLE_WIDTH; /* width us */ |
| 538 | + |
| 539 | + ret = 0; |
| 540 | + |
| 541 | +out: |
| 542 | + return ret; |
| 543 | + |
| 544 | +} |
| 545 | + |
| 546 | +/* |
| 547 | + * simple_data_read - Wrapper read function for global state debugfs entries |
| 548 | + * @filp: The active open file structure for the debugfs "file" |
| 549 | + * @ubuf: The userspace provided buffer to read value into |
| 550 | + * @cnt: The maximum number of bytes to read |
| 551 | + * @ppos: The current "file" position |
| 552 | + * @entry: The entry to read from |
| 553 | + * |
| 554 | + * This function provides a generic read implementation for the global state |
| 555 | + * "data" structure debugfs filesystem entries. It would be nice to use |
| 556 | + * simple_attr_read directly, but we need to make sure that the data.lock |
| 557 | + * spinlock is held during the actual read (even though we likely won't ever |
| 558 | + * actually race here as the updater runs under a stop_machine context). |
| 559 | + */ |
| 560 | +static ssize_t simple_data_read(struct file *filp, char __user *ubuf, |
| 561 | + size_t cnt, loff_t *ppos, const u64 *entry) |
| 562 | +{ |
| 563 | + char buf[U64STR_SIZE]; |
| 564 | + u64 val = 0; |
| 565 | + int len = 0; |
| 566 | + |
| 567 | + memset(buf, 0, sizeof(buf)); |
| 568 | + |
| 569 | + if (!entry) |
| 570 | + return -EFAULT; |
| 571 | + |
| 572 | + mutex_lock(&data.lock); |
| 573 | + val = *entry; |
| 574 | + mutex_unlock(&data.lock); |
| 575 | + |
| 576 | + len = snprintf(buf, sizeof(buf), "%llu\n", (unsigned long long)val); |
| 577 | + |
| 578 | + return simple_read_from_buffer(ubuf, cnt, ppos, buf, len); |
| 579 | + |
| 580 | +} |
| 581 | + |
| 582 | +/* |
| 583 | + * simple_data_write - Wrapper write function for global state debugfs entries |
| 584 | + * @filp: The active open file structure for the debugfs "file" |
| 585 | + * @ubuf: The userspace provided buffer to write value from |
| 586 | + * @cnt: The maximum number of bytes to write |
| 587 | + * @ppos: The current "file" position |
| 588 | + * @entry: The entry to write to |
| 589 | + * |
| 590 | + * This function provides a generic write implementation for the global state |
| 591 | + * "data" structure debugfs filesystem entries. It would be nice to use |
| 592 | + * simple_attr_write directly, but we need to make sure that the data.lock |
| 593 | + * spinlock is held during the actual write (even though we likely won't ever |
| 594 | + * actually race here as the updater runs under a stop_machine context). |
| 595 | + */ |
| 596 | +static ssize_t simple_data_write(struct file *filp, const char __user *ubuf, |
| 597 | + size_t cnt, loff_t *ppos, u64 *entry) |
| 598 | +{ |
| 599 | + char buf[U64STR_SIZE]; |
| 600 | + int csize = min(cnt, sizeof(buf)); |
| 601 | + u64 val = 0; |
| 602 | + int err = 0; |
| 603 | + |
| 604 | + memset(buf, '\0', sizeof(buf)); |
| 605 | + if (copy_from_user(buf, ubuf, csize)) |
| 606 | + return -EFAULT; |
| 607 | + |
| 608 | + buf[U64STR_SIZE-1] = '\0'; /* just in case */ |
| 609 | + err = kstrtoull(buf, 10, &val); |
| 610 | + if (err) |
| 611 | + return -EINVAL; |
| 612 | + |
| 613 | + mutex_lock(&data.lock); |
| 614 | + *entry = val; |
| 615 | + mutex_unlock(&data.lock); |
| 616 | + |
| 617 | + return csize; |
| 618 | +} |
| 619 | + |
| 620 | +/** |
| 621 | + * debug_count_fopen - Open function for "count" debugfs entry |
| 622 | + * @inode: The in-kernel inode representation of the debugfs "file" |
| 623 | + * @filp: The active open file structure for the debugfs "file" |
| 624 | + * |
| 625 | + * This function provides an open implementation for the "count" debugfs |
| 626 | + * interface to the hardware latency detector. |
| 627 | + */ |
| 628 | +static int debug_count_fopen(struct inode *inode, struct file *filp) |
| 629 | +{ |
| 630 | + return 0; |
| 631 | +} |
| 632 | + |
| 633 | +/** |
| 634 | + * debug_count_fread - Read function for "count" debugfs entry |
| 635 | + * @filp: The active open file structure for the debugfs "file" |
| 636 | + * @ubuf: The userspace provided buffer to read value into |
| 637 | + * @cnt: The maximum number of bytes to read |
| 638 | + * @ppos: The current "file" position |
| 639 | + * |
| 640 | + * This function provides a read implementation for the "count" debugfs |
| 641 | + * interface to the hardware latency detector. Can be used to read the |
| 642 | + * number of latency readings exceeding the configured threshold since |
| 643 | + * the detector was last reset (e.g. by writing a zero into "count"). |
| 644 | + */ |
| 645 | +static ssize_t debug_count_fread(struct file *filp, char __user *ubuf, |
| 646 | + size_t cnt, loff_t *ppos) |
| 647 | +{ |
| 648 | + return simple_data_read(filp, ubuf, cnt, ppos, &data.count); |
| 649 | +} |
| 650 | + |
| 651 | +/** |
| 652 | + * debug_count_fwrite - Write function for "count" debugfs entry |
| 653 | + * @filp: The active open file structure for the debugfs "file" |
| 654 | + * @ubuf: The user buffer that contains the value to write |
| 655 | + * @cnt: The maximum number of bytes to write to "file" |
| 656 | + * @ppos: The current position in the debugfs "file" |
| 657 | + * |
| 658 | + * This function provides a write implementation for the "count" debugfs |
| 659 | + * interface to the hardware latency detector. Can be used to write a |
| 660 | + * desired value, especially to zero the total count. |
| 661 | + */ |
| 662 | +static ssize_t debug_count_fwrite(struct file *filp, |
| 663 | + const char __user *ubuf, |
| 664 | + size_t cnt, |
| 665 | + loff_t *ppos) |
| 666 | +{ |
| 667 | + return simple_data_write(filp, ubuf, cnt, ppos, &data.count); |
| 668 | +} |
| 669 | + |
| 670 | +/** |
| 671 | + * debug_enable_fopen - Dummy open function for "enable" debugfs interface |
| 672 | + * @inode: The in-kernel inode representation of the debugfs "file" |
| 673 | + * @filp: The active open file structure for the debugfs "file" |
| 674 | + * |
| 675 | + * This function provides an open implementation for the "enable" debugfs |
| 676 | + * interface to the hardware latency detector. |
| 677 | + */ |
| 678 | +static int debug_enable_fopen(struct inode *inode, struct file *filp) |
| 679 | +{ |
| 680 | + return 0; |
| 681 | +} |
| 682 | + |
| 683 | +/** |
| 684 | + * debug_enable_fread - Read function for "enable" debugfs interface |
| 685 | + * @filp: The active open file structure for the debugfs "file" |
| 686 | + * @ubuf: The userspace provided buffer to read value into |
| 687 | + * @cnt: The maximum number of bytes to read |
| 688 | + * @ppos: The current "file" position |
| 689 | + * |
| 690 | + * This function provides a read implementation for the "enable" debugfs |
| 691 | + * interface to the hardware latency detector. Can be used to determine |
| 692 | + * whether the detector is currently enabled ("0\n" or "1\n" returned). |
| 693 | + */ |
| 694 | +static ssize_t debug_enable_fread(struct file *filp, char __user *ubuf, |
| 695 | + size_t cnt, loff_t *ppos) |
| 696 | +{ |
| 697 | + char buf[4]; |
| 698 | + |
| 699 | + if ((cnt < sizeof(buf)) || (*ppos)) |
| 700 | + return 0; |
| 701 | + |
| 702 | + buf[0] = enabled ? '1' : '0'; |
| 703 | + buf[1] = '\n'; |
| 704 | + buf[2] = '\0'; |
| 705 | + if (copy_to_user(ubuf, buf, strlen(buf))) |
| 706 | + return -EFAULT; |
| 707 | + return *ppos = strlen(buf); |
| 708 | +} |
| 709 | + |
| 710 | +/** |
| 711 | + * debug_enable_fwrite - Write function for "enable" debugfs interface |
| 712 | + * @filp: The active open file structure for the debugfs "file" |
| 713 | + * @ubuf: The user buffer that contains the value to write |
| 714 | + * @cnt: The maximum number of bytes to write to "file" |
| 715 | + * @ppos: The current position in the debugfs "file" |
| 716 | + * |
| 717 | + * This function provides a write implementation for the "enable" debugfs |
| 718 | + * interface to the hardware latency detector. Can be used to enable or |
| 719 | + * disable the detector, which will have the side-effect of possibly |
| 720 | + * also resetting the global stats and kicking off the measuring |
| 721 | + * kthread (on an enable) or the converse (upon a disable). |
| 722 | + */ |
| 723 | +static ssize_t debug_enable_fwrite(struct file *filp, |
| 724 | + const char __user *ubuf, |
| 725 | + size_t cnt, |
| 726 | + loff_t *ppos) |
| 727 | +{ |
| 728 | + char buf[4]; |
| 729 | + int csize = min(cnt, sizeof(buf)); |
| 730 | + long val = 0; |
| 731 | + int err = 0; |
| 732 | + |
| 733 | + memset(buf, '\0', sizeof(buf)); |
| 734 | + if (copy_from_user(buf, ubuf, csize)) |
| 735 | + return -EFAULT; |
| 736 | + |
| 737 | + buf[sizeof(buf)-1] = '\0'; /* just in case */ |
| 738 | + err = kstrtoul(buf, 10, &val); |
Allen Martin | fc468d8 | 2016-11-15 17:57:52 -0800 | [diff] [blame] | 739 | + if (0 != err) |
Allen Martin | 685e0f8 | 2016-07-26 19:34:29 -0700 | [diff] [blame] | 740 | + return -EINVAL; |
| 741 | + |
| 742 | + if (val) { |
| 743 | + if (enabled) |
| 744 | + goto unlock; |
| 745 | + enabled = 1; |
| 746 | + __reset_stats(); |
| 747 | + if (start_kthread()) |
| 748 | + return -EFAULT; |
| 749 | + } else { |
| 750 | + if (!enabled) |
| 751 | + goto unlock; |
| 752 | + enabled = 0; |
| 753 | + err = stop_kthread(); |
| 754 | + if (err) { |
| 755 | + pr_err(BANNER "cannot stop kthread\n"); |
| 756 | + return -EFAULT; |
| 757 | + } |
| 758 | + wake_up(&data.wq); /* reader(s) should return */ |
| 759 | + } |
| 760 | +unlock: |
| 761 | + return csize; |
| 762 | +} |
| 763 | + |
| 764 | +/** |
| 765 | + * debug_max_fopen - Open function for "max" debugfs entry |
| 766 | + * @inode: The in-kernel inode representation of the debugfs "file" |
| 767 | + * @filp: The active open file structure for the debugfs "file" |
| 768 | + * |
| 769 | + * This function provides an open implementation for the "max" debugfs |
| 770 | + * interface to the hardware latency detector. |
| 771 | + */ |
| 772 | +static int debug_max_fopen(struct inode *inode, struct file *filp) |
| 773 | +{ |
| 774 | + return 0; |
| 775 | +} |
| 776 | + |
| 777 | +/** |
| 778 | + * debug_max_fread - Read function for "max" debugfs entry |
| 779 | + * @filp: The active open file structure for the debugfs "file" |
| 780 | + * @ubuf: The userspace provided buffer to read value into |
| 781 | + * @cnt: The maximum number of bytes to read |
| 782 | + * @ppos: The current "file" position |
| 783 | + * |
| 784 | + * This function provides a read implementation for the "max" debugfs |
| 785 | + * interface to the hardware latency detector. Can be used to determine |
| 786 | + * the maximum latency value observed since it was last reset. |
| 787 | + */ |
| 788 | +static ssize_t debug_max_fread(struct file *filp, char __user *ubuf, |
| 789 | + size_t cnt, loff_t *ppos) |
| 790 | +{ |
| 791 | + return simple_data_read(filp, ubuf, cnt, ppos, &data.max_sample); |
| 792 | +} |
| 793 | + |
| 794 | +/** |
| 795 | + * debug_max_fwrite - Write function for "max" debugfs entry |
| 796 | + * @filp: The active open file structure for the debugfs "file" |
| 797 | + * @ubuf: The user buffer that contains the value to write |
| 798 | + * @cnt: The maximum number of bytes to write to "file" |
| 799 | + * @ppos: The current position in the debugfs "file" |
| 800 | + * |
| 801 | + * This function provides a write implementation for the "max" debugfs |
| 802 | + * interface to the hardware latency detector. Can be used to reset the |
| 803 | + * maximum or set it to some other desired value - if, then, subsequent |
| 804 | + * measurements exceed this value, the maximum will be updated. |
| 805 | + */ |
| 806 | +static ssize_t debug_max_fwrite(struct file *filp, |
| 807 | + const char __user *ubuf, |
| 808 | + size_t cnt, |
| 809 | + loff_t *ppos) |
| 810 | +{ |
| 811 | + return simple_data_write(filp, ubuf, cnt, ppos, &data.max_sample); |
| 812 | +} |
| 813 | + |
| 814 | + |
| 815 | +/** |
| 816 | + * debug_sample_fopen - An open function for "sample" debugfs interface |
| 817 | + * @inode: The in-kernel inode representation of this debugfs "file" |
| 818 | + * @filp: The active open file structure for the debugfs "file" |
| 819 | + * |
| 820 | + * This function handles opening the "sample" file within the hardware |
| 821 | + * latency detector debugfs directory interface. This file is used to read |
| 822 | + * raw samples from the global ring_buffer and allows the user to see a |
| 823 | + * running latency history. Can be opened blocking or non-blocking, |
| 824 | + * affecting whether it behaves as a buffer read pipe, or does not. |
| 825 | + * Implements simple locking to prevent multiple simultaneous use. |
| 826 | + */ |
| 827 | +static int debug_sample_fopen(struct inode *inode, struct file *filp) |
| 828 | +{ |
| 829 | + if (!atomic_add_unless(&data.sample_open, 1, 1)) |
| 830 | + return -EBUSY; |
| 831 | + else |
| 832 | + return 0; |
| 833 | +} |
| 834 | + |
| 835 | +/** |
| 836 | + * debug_sample_fread - A read function for "sample" debugfs interface |
| 837 | + * @filp: The active open file structure for the debugfs "file" |
| 838 | + * @ubuf: The user buffer that will contain the samples read |
| 839 | + * @cnt: The maximum bytes to read from the debugfs "file" |
| 840 | + * @ppos: The current position in the debugfs "file" |
| 841 | + * |
| 842 | + * This function handles reading from the "sample" file within the hardware |
| 843 | + * latency detector debugfs directory interface. This file is used to read |
| 844 | + * raw samples from the global ring_buffer and allows the user to see a |
| 845 | + * running latency history. By default this will block pending a new |
| 846 | + * value written into the sample buffer, unless there are already a |
| 847 | + * number of value(s) waiting in the buffer, or the sample file was |
| 848 | + * previously opened in a non-blocking mode of operation. |
| 849 | + */ |
| 850 | +static ssize_t debug_sample_fread(struct file *filp, char __user *ubuf, |
| 851 | + size_t cnt, loff_t *ppos) |
| 852 | +{ |
| 853 | + int len = 0; |
| 854 | + char buf[64]; |
| 855 | + struct sample *sample = NULL; |
| 856 | + |
| 857 | + if (!enabled) |
| 858 | + return 0; |
| 859 | + |
| 860 | + sample = kzalloc(sizeof(struct sample), GFP_KERNEL); |
| 861 | + if (!sample) |
| 862 | + return -ENOMEM; |
| 863 | + |
| 864 | + while (!buffer_get_sample(sample)) { |
| 865 | + |
| 866 | + DEFINE_WAIT(wait); |
| 867 | + |
| 868 | + if (filp->f_flags & O_NONBLOCK) { |
| 869 | + len = -EAGAIN; |
| 870 | + goto out; |
| 871 | + } |
| 872 | + |
| 873 | + prepare_to_wait(&data.wq, &wait, TASK_INTERRUPTIBLE); |
| 874 | + schedule(); |
| 875 | + finish_wait(&data.wq, &wait); |
| 876 | + |
| 877 | + if (signal_pending(current)) { |
| 878 | + len = -EINTR; |
| 879 | + goto out; |
| 880 | + } |
| 881 | + |
| 882 | + if (!enabled) { /* enable was toggled */ |
| 883 | + len = 0; |
| 884 | + goto out; |
| 885 | + } |
| 886 | + } |
| 887 | + |
| 888 | + len = snprintf(buf, sizeof(buf), "%010lu.%010lu\t%llu\n", |
| 889 | + sample->timestamp.tv_sec, |
| 890 | + sample->timestamp.tv_nsec, |
| 891 | + sample->duration); |
| 892 | + |
| 893 | + |
| 894 | + /* handling partial reads is more trouble than it's worth */ |
| 895 | + if (len > cnt) |
| 896 | + goto out; |
| 897 | + |
| 898 | + if (copy_to_user(ubuf, buf, len)) |
| 899 | + len = -EFAULT; |
| 900 | + |
| 901 | +out: |
| 902 | + kfree(sample); |
| 903 | + return len; |
| 904 | +} |
| 905 | + |
| 906 | +/** |
| 907 | + * debug_sample_release - Release function for "sample" debugfs interface |
| 908 | + * @inode: The in-kernel inode represenation of the debugfs "file" |
| 909 | + * @filp: The active open file structure for the debugfs "file" |
| 910 | + * |
| 911 | + * This function completes the close of the debugfs interface "sample" file. |
| 912 | + * Frees the sample_open "lock" so that other users may open the interface. |
| 913 | + */ |
| 914 | +static int debug_sample_release(struct inode *inode, struct file *filp) |
| 915 | +{ |
| 916 | + atomic_dec(&data.sample_open); |
| 917 | + |
| 918 | + return 0; |
| 919 | +} |
| 920 | + |
| 921 | +/** |
| 922 | + * debug_threshold_fopen - Open function for "threshold" debugfs entry |
| 923 | + * @inode: The in-kernel inode representation of the debugfs "file" |
| 924 | + * @filp: The active open file structure for the debugfs "file" |
| 925 | + * |
| 926 | + * This function provides an open implementation for the "threshold" debugfs |
| 927 | + * interface to the hardware latency detector. |
| 928 | + */ |
| 929 | +static int debug_threshold_fopen(struct inode *inode, struct file *filp) |
| 930 | +{ |
| 931 | + return 0; |
| 932 | +} |
| 933 | + |
| 934 | +/** |
| 935 | + * debug_threshold_fread - Read function for "threshold" debugfs entry |
| 936 | + * @filp: The active open file structure for the debugfs "file" |
| 937 | + * @ubuf: The userspace provided buffer to read value into |
| 938 | + * @cnt: The maximum number of bytes to read |
| 939 | + * @ppos: The current "file" position |
| 940 | + * |
| 941 | + * This function provides a read implementation for the "threshold" debugfs |
| 942 | + * interface to the hardware latency detector. It can be used to determine |
| 943 | + * the current threshold level at which a latency will be recorded in the |
| 944 | + * global ring buffer, typically on the order of 10us. |
| 945 | + */ |
| 946 | +static ssize_t debug_threshold_fread(struct file *filp, char __user *ubuf, |
| 947 | + size_t cnt, loff_t *ppos) |
| 948 | +{ |
| 949 | + return simple_data_read(filp, ubuf, cnt, ppos, &data.threshold); |
| 950 | +} |
| 951 | + |
| 952 | +/** |
| 953 | + * debug_threshold_fwrite - Write function for "threshold" debugfs entry |
| 954 | + * @filp: The active open file structure for the debugfs "file" |
| 955 | + * @ubuf: The user buffer that contains the value to write |
| 956 | + * @cnt: The maximum number of bytes to write to "file" |
| 957 | + * @ppos: The current position in the debugfs "file" |
| 958 | + * |
| 959 | + * This function provides a write implementation for the "threshold" debugfs |
| 960 | + * interface to the hardware latency detector. It can be used to configure |
| 961 | + * the threshold level at which any subsequently detected latencies will |
| 962 | + * be recorded into the global ring buffer. |
| 963 | + */ |
| 964 | +static ssize_t debug_threshold_fwrite(struct file *filp, |
| 965 | + const char __user *ubuf, |
| 966 | + size_t cnt, |
| 967 | + loff_t *ppos) |
| 968 | +{ |
| 969 | + int ret; |
| 970 | + |
| 971 | + ret = simple_data_write(filp, ubuf, cnt, ppos, &data.threshold); |
| 972 | + |
| 973 | + if (enabled) |
| 974 | + wake_up_process(kthread); |
| 975 | + |
| 976 | + return ret; |
| 977 | +} |
| 978 | + |
| 979 | +/** |
| 980 | + * debug_width_fopen - Open function for "width" debugfs entry |
| 981 | + * @inode: The in-kernel inode representation of the debugfs "file" |
| 982 | + * @filp: The active open file structure for the debugfs "file" |
| 983 | + * |
| 984 | + * This function provides an open implementation for the "width" debugfs |
| 985 | + * interface to the hardware latency detector. |
| 986 | + */ |
| 987 | +static int debug_width_fopen(struct inode *inode, struct file *filp) |
| 988 | +{ |
| 989 | + return 0; |
| 990 | +} |
| 991 | + |
| 992 | +/** |
| 993 | + * debug_width_fread - Read function for "width" debugfs entry |
| 994 | + * @filp: The active open file structure for the debugfs "file" |
| 995 | + * @ubuf: The userspace provided buffer to read value into |
| 996 | + * @cnt: The maximum number of bytes to read |
| 997 | + * @ppos: The current "file" position |
| 998 | + * |
| 999 | + * This function provides a read implementation for the "width" debugfs |
| 1000 | + * interface to the hardware latency detector. It can be used to determine |
| 1001 | + * for how many us of the total window us we will actively sample for any |
| 1002 | + * hardware-induced latecy periods. Obviously, it is not possible to |
| 1003 | + * sample constantly and have the system respond to a sample reader, or, |
| 1004 | + * worse, without having the system appear to have gone out to lunch. |
| 1005 | + */ |
| 1006 | +static ssize_t debug_width_fread(struct file *filp, char __user *ubuf, |
| 1007 | + size_t cnt, loff_t *ppos) |
| 1008 | +{ |
| 1009 | + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_width); |
| 1010 | +} |
| 1011 | + |
| 1012 | +/** |
| 1013 | + * debug_width_fwrite - Write function for "width" debugfs entry |
| 1014 | + * @filp: The active open file structure for the debugfs "file" |
| 1015 | + * @ubuf: The user buffer that contains the value to write |
| 1016 | + * @cnt: The maximum number of bytes to write to "file" |
| 1017 | + * @ppos: The current position in the debugfs "file" |
| 1018 | + * |
| 1019 | + * This function provides a write implementation for the "width" debugfs |
| 1020 | + * interface to the hardware latency detector. It can be used to configure |
| 1021 | + * for how many us of the total window us we will actively sample for any |
| 1022 | + * hardware-induced latency periods. Obviously, it is not possible to |
| 1023 | + * sample constantly and have the system respond to a sample reader, or, |
| 1024 | + * worse, without having the system appear to have gone out to lunch. It |
| 1025 | + * is enforced that width is less that the total window size. |
| 1026 | + */ |
| 1027 | +static ssize_t debug_width_fwrite(struct file *filp, |
| 1028 | + const char __user *ubuf, |
| 1029 | + size_t cnt, |
| 1030 | + loff_t *ppos) |
| 1031 | +{ |
| 1032 | + char buf[U64STR_SIZE]; |
| 1033 | + int csize = min(cnt, sizeof(buf)); |
| 1034 | + u64 val = 0; |
| 1035 | + int err = 0; |
| 1036 | + |
| 1037 | + memset(buf, '\0', sizeof(buf)); |
| 1038 | + if (copy_from_user(buf, ubuf, csize)) |
| 1039 | + return -EFAULT; |
| 1040 | + |
| 1041 | + buf[U64STR_SIZE-1] = '\0'; /* just in case */ |
| 1042 | + err = kstrtoull(buf, 10, &val); |
Allen Martin | fc468d8 | 2016-11-15 17:57:52 -0800 | [diff] [blame] | 1043 | + if (0 != err) |
Allen Martin | 685e0f8 | 2016-07-26 19:34:29 -0700 | [diff] [blame] | 1044 | + return -EINVAL; |
| 1045 | + |
| 1046 | + mutex_lock(&data.lock); |
| 1047 | + if (val < data.sample_window) |
| 1048 | + data.sample_width = val; |
| 1049 | + else { |
| 1050 | + mutex_unlock(&data.lock); |
| 1051 | + return -EINVAL; |
| 1052 | + } |
| 1053 | + mutex_unlock(&data.lock); |
| 1054 | + |
| 1055 | + if (enabled) |
| 1056 | + wake_up_process(kthread); |
| 1057 | + |
| 1058 | + return csize; |
| 1059 | +} |
| 1060 | + |
| 1061 | +/** |
| 1062 | + * debug_window_fopen - Open function for "window" debugfs entry |
| 1063 | + * @inode: The in-kernel inode representation of the debugfs "file" |
| 1064 | + * @filp: The active open file structure for the debugfs "file" |
| 1065 | + * |
| 1066 | + * This function provides an open implementation for the "window" debugfs |
| 1067 | + * interface to the hardware latency detector. The window is the total time |
| 1068 | + * in us that will be considered one sample period. Conceptually, windows |
| 1069 | + * occur back-to-back and contain a sample width period during which |
| 1070 | + * actual sampling occurs. |
| 1071 | + */ |
| 1072 | +static int debug_window_fopen(struct inode *inode, struct file *filp) |
| 1073 | +{ |
| 1074 | + return 0; |
| 1075 | +} |
| 1076 | + |
| 1077 | +/** |
| 1078 | + * debug_window_fread - Read function for "window" debugfs entry |
| 1079 | + * @filp: The active open file structure for the debugfs "file" |
| 1080 | + * @ubuf: The userspace provided buffer to read value into |
| 1081 | + * @cnt: The maximum number of bytes to read |
| 1082 | + * @ppos: The current "file" position |
| 1083 | + * |
| 1084 | + * This function provides a read implementation for the "window" debugfs |
| 1085 | + * interface to the hardware latency detector. The window is the total time |
| 1086 | + * in us that will be considered one sample period. Conceptually, windows |
| 1087 | + * occur back-to-back and contain a sample width period during which |
| 1088 | + * actual sampling occurs. Can be used to read the total window size. |
| 1089 | + */ |
| 1090 | +static ssize_t debug_window_fread(struct file *filp, char __user *ubuf, |
| 1091 | + size_t cnt, loff_t *ppos) |
| 1092 | +{ |
| 1093 | + return simple_data_read(filp, ubuf, cnt, ppos, &data.sample_window); |
| 1094 | +} |
| 1095 | + |
| 1096 | +/** |
| 1097 | + * debug_window_fwrite - Write function for "window" debugfs entry |
| 1098 | + * @filp: The active open file structure for the debugfs "file" |
| 1099 | + * @ubuf: The user buffer that contains the value to write |
| 1100 | + * @cnt: The maximum number of bytes to write to "file" |
| 1101 | + * @ppos: The current position in the debugfs "file" |
| 1102 | + * |
| 1103 | + * This function provides a write implementation for the "window" debufds |
| 1104 | + * interface to the hardware latency detetector. The window is the total time |
| 1105 | + * in us that will be considered one sample period. Conceptually, windows |
| 1106 | + * occur back-to-back and contain a sample width period during which |
| 1107 | + * actual sampling occurs. Can be used to write a new total window size. It |
| 1108 | + * is enfoced that any value written must be greater than the sample width |
| 1109 | + * size, or an error results. |
| 1110 | + */ |
| 1111 | +static ssize_t debug_window_fwrite(struct file *filp, |
| 1112 | + const char __user *ubuf, |
| 1113 | + size_t cnt, |
| 1114 | + loff_t *ppos) |
| 1115 | +{ |
| 1116 | + char buf[U64STR_SIZE]; |
| 1117 | + int csize = min(cnt, sizeof(buf)); |
| 1118 | + u64 val = 0; |
| 1119 | + int err = 0; |
| 1120 | + |
| 1121 | + memset(buf, '\0', sizeof(buf)); |
| 1122 | + if (copy_from_user(buf, ubuf, csize)) |
| 1123 | + return -EFAULT; |
| 1124 | + |
| 1125 | + buf[U64STR_SIZE-1] = '\0'; /* just in case */ |
| 1126 | + err = kstrtoull(buf, 10, &val); |
Allen Martin | fc468d8 | 2016-11-15 17:57:52 -0800 | [diff] [blame] | 1127 | + if (0 != err) |
Allen Martin | 685e0f8 | 2016-07-26 19:34:29 -0700 | [diff] [blame] | 1128 | + return -EINVAL; |
| 1129 | + |
| 1130 | + mutex_lock(&data.lock); |
| 1131 | + if (data.sample_width < val) |
| 1132 | + data.sample_window = val; |
| 1133 | + else { |
| 1134 | + mutex_unlock(&data.lock); |
| 1135 | + return -EINVAL; |
| 1136 | + } |
| 1137 | + mutex_unlock(&data.lock); |
| 1138 | + |
| 1139 | + return csize; |
| 1140 | +} |
| 1141 | + |
| 1142 | +/* |
| 1143 | + * Function pointers for the "count" debugfs file operations |
| 1144 | + */ |
| 1145 | +static const struct file_operations count_fops = { |
| 1146 | + .open = debug_count_fopen, |
| 1147 | + .read = debug_count_fread, |
| 1148 | + .write = debug_count_fwrite, |
| 1149 | + .owner = THIS_MODULE, |
| 1150 | +}; |
| 1151 | + |
| 1152 | +/* |
| 1153 | + * Function pointers for the "enable" debugfs file operations |
| 1154 | + */ |
| 1155 | +static const struct file_operations enable_fops = { |
| 1156 | + .open = debug_enable_fopen, |
| 1157 | + .read = debug_enable_fread, |
| 1158 | + .write = debug_enable_fwrite, |
| 1159 | + .owner = THIS_MODULE, |
| 1160 | +}; |
| 1161 | + |
| 1162 | +/* |
| 1163 | + * Function pointers for the "max" debugfs file operations |
| 1164 | + */ |
| 1165 | +static const struct file_operations max_fops = { |
| 1166 | + .open = debug_max_fopen, |
| 1167 | + .read = debug_max_fread, |
| 1168 | + .write = debug_max_fwrite, |
| 1169 | + .owner = THIS_MODULE, |
| 1170 | +}; |
| 1171 | + |
| 1172 | +/* |
| 1173 | + * Function pointers for the "sample" debugfs file operations |
| 1174 | + */ |
| 1175 | +static const struct file_operations sample_fops = { |
| 1176 | + .open = debug_sample_fopen, |
| 1177 | + .read = debug_sample_fread, |
| 1178 | + .release = debug_sample_release, |
| 1179 | + .owner = THIS_MODULE, |
| 1180 | +}; |
| 1181 | + |
| 1182 | +/* |
| 1183 | + * Function pointers for the "threshold" debugfs file operations |
| 1184 | + */ |
| 1185 | +static const struct file_operations threshold_fops = { |
| 1186 | + .open = debug_threshold_fopen, |
| 1187 | + .read = debug_threshold_fread, |
| 1188 | + .write = debug_threshold_fwrite, |
| 1189 | + .owner = THIS_MODULE, |
| 1190 | +}; |
| 1191 | + |
| 1192 | +/* |
| 1193 | + * Function pointers for the "width" debugfs file operations |
| 1194 | + */ |
| 1195 | +static const struct file_operations width_fops = { |
| 1196 | + .open = debug_width_fopen, |
| 1197 | + .read = debug_width_fread, |
| 1198 | + .write = debug_width_fwrite, |
| 1199 | + .owner = THIS_MODULE, |
| 1200 | +}; |
| 1201 | + |
| 1202 | +/* |
| 1203 | + * Function pointers for the "window" debugfs file operations |
| 1204 | + */ |
| 1205 | +static const struct file_operations window_fops = { |
| 1206 | + .open = debug_window_fopen, |
| 1207 | + .read = debug_window_fread, |
| 1208 | + .write = debug_window_fwrite, |
| 1209 | + .owner = THIS_MODULE, |
| 1210 | +}; |
| 1211 | + |
| 1212 | +/** |
| 1213 | + * init_debugfs - A function to initialize the debugfs interface files |
| 1214 | + * |
| 1215 | + * This function creates entries in debugfs for "hwlat_detector", including |
| 1216 | + * files to read values from the detector, current samples, and the |
| 1217 | + * maximum sample that has been captured since the hardware latency |
| 1218 | + * dectector was started. |
| 1219 | + */ |
| 1220 | +static int init_debugfs(void) |
| 1221 | +{ |
| 1222 | + int ret = -ENOMEM; |
| 1223 | + |
| 1224 | + debug_dir = debugfs_create_dir(DRVNAME, NULL); |
| 1225 | + if (!debug_dir) |
| 1226 | + goto err_debug_dir; |
| 1227 | + |
| 1228 | + debug_sample = debugfs_create_file("sample", 0444, |
| 1229 | + debug_dir, NULL, |
| 1230 | + &sample_fops); |
| 1231 | + if (!debug_sample) |
| 1232 | + goto err_sample; |
| 1233 | + |
| 1234 | + debug_count = debugfs_create_file("count", 0444, |
| 1235 | + debug_dir, NULL, |
| 1236 | + &count_fops); |
| 1237 | + if (!debug_count) |
| 1238 | + goto err_count; |
| 1239 | + |
| 1240 | + debug_max = debugfs_create_file("max", 0444, |
| 1241 | + debug_dir, NULL, |
| 1242 | + &max_fops); |
| 1243 | + if (!debug_max) |
| 1244 | + goto err_max; |
| 1245 | + |
| 1246 | + debug_sample_window = debugfs_create_file("window", 0644, |
| 1247 | + debug_dir, NULL, |
| 1248 | + &window_fops); |
| 1249 | + if (!debug_sample_window) |
| 1250 | + goto err_window; |
| 1251 | + |
| 1252 | + debug_sample_width = debugfs_create_file("width", 0644, |
| 1253 | + debug_dir, NULL, |
| 1254 | + &width_fops); |
| 1255 | + if (!debug_sample_width) |
| 1256 | + goto err_width; |
| 1257 | + |
| 1258 | + debug_threshold = debugfs_create_file("threshold", 0644, |
| 1259 | + debug_dir, NULL, |
| 1260 | + &threshold_fops); |
| 1261 | + if (!debug_threshold) |
| 1262 | + goto err_threshold; |
| 1263 | + |
| 1264 | + debug_enable = debugfs_create_file("enable", 0644, |
| 1265 | + debug_dir, &enabled, |
| 1266 | + &enable_fops); |
| 1267 | + if (!debug_enable) |
| 1268 | + goto err_enable; |
| 1269 | + |
| 1270 | + else { |
| 1271 | + ret = 0; |
| 1272 | + goto out; |
| 1273 | + } |
| 1274 | + |
| 1275 | +err_enable: |
| 1276 | + debugfs_remove(debug_threshold); |
| 1277 | +err_threshold: |
| 1278 | + debugfs_remove(debug_sample_width); |
| 1279 | +err_width: |
| 1280 | + debugfs_remove(debug_sample_window); |
| 1281 | +err_window: |
| 1282 | + debugfs_remove(debug_max); |
| 1283 | +err_max: |
| 1284 | + debugfs_remove(debug_count); |
| 1285 | +err_count: |
| 1286 | + debugfs_remove(debug_sample); |
| 1287 | +err_sample: |
| 1288 | + debugfs_remove(debug_dir); |
| 1289 | +err_debug_dir: |
| 1290 | +out: |
| 1291 | + return ret; |
| 1292 | +} |
| 1293 | + |
| 1294 | +/** |
| 1295 | + * free_debugfs - A function to cleanup the debugfs file interface |
| 1296 | + */ |
| 1297 | +static void free_debugfs(void) |
| 1298 | +{ |
| 1299 | + /* could also use a debugfs_remove_recursive */ |
| 1300 | + debugfs_remove(debug_enable); |
| 1301 | + debugfs_remove(debug_threshold); |
| 1302 | + debugfs_remove(debug_sample_width); |
| 1303 | + debugfs_remove(debug_sample_window); |
| 1304 | + debugfs_remove(debug_max); |
| 1305 | + debugfs_remove(debug_count); |
| 1306 | + debugfs_remove(debug_sample); |
| 1307 | + debugfs_remove(debug_dir); |
| 1308 | +} |
| 1309 | + |
| 1310 | +/** |
| 1311 | + * detector_init - Standard module initialization code |
| 1312 | + */ |
| 1313 | +static int detector_init(void) |
| 1314 | +{ |
| 1315 | + int ret = -ENOMEM; |
| 1316 | + |
| 1317 | + pr_info(BANNER "version %s\n", VERSION); |
| 1318 | + |
| 1319 | + ret = init_stats(); |
Allen Martin | fc468d8 | 2016-11-15 17:57:52 -0800 | [diff] [blame] | 1320 | + if (0 != ret) |
Allen Martin | 685e0f8 | 2016-07-26 19:34:29 -0700 | [diff] [blame] | 1321 | + goto out; |
| 1322 | + |
| 1323 | + ret = init_debugfs(); |
Allen Martin | fc468d8 | 2016-11-15 17:57:52 -0800 | [diff] [blame] | 1324 | + if (0 != ret) |
Allen Martin | 685e0f8 | 2016-07-26 19:34:29 -0700 | [diff] [blame] | 1325 | + goto err_stats; |
| 1326 | + |
| 1327 | + if (enabled) |
| 1328 | + ret = start_kthread(); |
| 1329 | + |
| 1330 | + goto out; |
| 1331 | + |
| 1332 | +err_stats: |
| 1333 | + ring_buffer_free(ring_buffer); |
| 1334 | +out: |
| 1335 | + return ret; |
| 1336 | + |
| 1337 | +} |
| 1338 | + |
| 1339 | +/** |
| 1340 | + * detector_exit - Standard module cleanup code |
| 1341 | + */ |
| 1342 | +static void detector_exit(void) |
| 1343 | +{ |
| 1344 | + int err; |
| 1345 | + |
| 1346 | + if (enabled) { |
| 1347 | + enabled = 0; |
| 1348 | + err = stop_kthread(); |
| 1349 | + if (err) |
| 1350 | + pr_err(BANNER "cannot stop kthread\n"); |
| 1351 | + } |
| 1352 | + |
| 1353 | + free_debugfs(); |
| 1354 | + ring_buffer_free(ring_buffer); /* free up the ring buffer */ |
| 1355 | + |
| 1356 | +} |
| 1357 | + |
| 1358 | +module_init(detector_init); |
| 1359 | +module_exit(detector_exit); |
| 1360 | -- |
Arvind M | 10268e7 | 2017-12-04 22:18:06 -0800 | [diff] [blame] | 1361 | 1.9.1 |
Allen Martin | 685e0f8 | 2016-07-26 19:34:29 -0700 | [diff] [blame] | 1362 | |