getdelays.c: fix overrun
[linux-2.6.git] / Documentation / accounting / getdelays.c
1 /* getdelays.c
2  *
3  * Utility to get per-pid and per-tgid delay accounting statistics
4  * Also illustrates usage of the taskstats interface
5  *
6  * Copyright (C) Shailabh Nagar, IBM Corp. 2005
7  * Copyright (C) Balbir Singh, IBM Corp. 2006
8  * Copyright (c) Jay Lan, SGI. 2006
9  *
10  * Compile with
11  *      gcc -I/usr/src/linux/include getdelays.c -o getdelays
12  */
13
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <errno.h>
17 #include <unistd.h>
18 #include <poll.h>
19 #include <string.h>
20 #include <fcntl.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <sys/socket.h>
24 #include <sys/types.h>
25 #include <signal.h>
26
27 #include <linux/genetlink.h>
28 #include <linux/taskstats.h>
29
30 /*
31  * Generic macros for dealing with netlink sockets. Might be duplicated
32  * elsewhere. It is recommended that commercial grade applications use
33  * libnl or libnetlink and use the interfaces provided by the library
34  */
35 #define GENLMSG_DATA(glh)       ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
36 #define GENLMSG_PAYLOAD(glh)    (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
37 #define NLA_DATA(na)            ((void *)((char*)(na) + NLA_HDRLEN))
38 #define NLA_PAYLOAD(len)        (len - NLA_HDRLEN)
39
40 #define err(code, fmt, arg...)                  \
41         do {                                    \
42                 fprintf(stderr, fmt, ##arg);    \
43                 exit(code);                     \
44         } while (0)
45
46 int done;
47 int rcvbufsz;
48 char name[100];
49 int dbg;
50 int print_delays;
51 int print_io_accounting;
52 __u64 stime, utime;
53
54 #define PRINTF(fmt, arg...) {                   \
55             if (dbg) {                          \
56                 printf(fmt, ##arg);             \
57             }                                   \
58         }
59
60 /* Maximum size of response requested or message sent */
61 #define MAX_MSG_SIZE    1024
62 /* Maximum number of cpus expected to be specified in a cpumask */
63 #define MAX_CPUS        32
64
65 struct msgtemplate {
66         struct nlmsghdr n;
67         struct genlmsghdr g;
68         char buf[MAX_MSG_SIZE];
69 };
70
71 char cpumask[100+6*MAX_CPUS];
72
73 static void usage(void)
74 {
75         fprintf(stderr, "getdelays [-dilv] [-w logfile] [-r bufsize] "
76                         "[-m cpumask] [-t tgid] [-p pid]\n");
77         fprintf(stderr, "  -d: print delayacct stats\n");
78         fprintf(stderr, "  -i: print IO accounting (works only with -p)\n");
79         fprintf(stderr, "  -l: listen forever\n");
80         fprintf(stderr, "  -v: debug on\n");
81 }
82
83 /*
84  * Create a raw netlink socket and bind
85  */
86 static int create_nl_socket(int protocol)
87 {
88         int fd;
89         struct sockaddr_nl local;
90
91         fd = socket(AF_NETLINK, SOCK_RAW, protocol);
92         if (fd < 0)
93                 return -1;
94
95         if (rcvbufsz)
96                 if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
97                                 &rcvbufsz, sizeof(rcvbufsz)) < 0) {
98                         fprintf(stderr, "Unable to set socket rcv buf size "
99                                         "to %d\n",
100                                 rcvbufsz);
101                         return -1;
102                 }
103
104         memset(&local, 0, sizeof(local));
105         local.nl_family = AF_NETLINK;
106
107         if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0)
108                 goto error;
109
110         return fd;
111 error:
112         close(fd);
113         return -1;
114 }
115
116
117 int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
118              __u8 genl_cmd, __u16 nla_type,
119              void *nla_data, int nla_len)
120 {
121         struct nlattr *na;
122         struct sockaddr_nl nladdr;
123         int r, buflen;
124         char *buf;
125
126         struct msgtemplate msg;
127
128         msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
129         msg.n.nlmsg_type = nlmsg_type;
130         msg.n.nlmsg_flags = NLM_F_REQUEST;
131         msg.n.nlmsg_seq = 0;
132         msg.n.nlmsg_pid = nlmsg_pid;
133         msg.g.cmd = genl_cmd;
134         msg.g.version = 0x1;
135         na = (struct nlattr *) GENLMSG_DATA(&msg);
136         na->nla_type = nla_type;
137         na->nla_len = nla_len + 1 + NLA_HDRLEN;
138         memcpy(NLA_DATA(na), nla_data, nla_len);
139         msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
140
141         buf = (char *) &msg;
142         buflen = msg.n.nlmsg_len ;
143         memset(&nladdr, 0, sizeof(nladdr));
144         nladdr.nl_family = AF_NETLINK;
145         while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
146                            sizeof(nladdr))) < buflen) {
147                 if (r > 0) {
148                         buf += r;
149                         buflen -= r;
150                 } else if (errno != EAGAIN)
151                         return -1;
152         }
153         return 0;
154 }
155
156
157 /*
158  * Probe the controller in genetlink to find the family id
159  * for the TASKSTATS family
160  */
161 int get_family_id(int sd)
162 {
163         struct {
164                 struct nlmsghdr n;
165                 struct genlmsghdr g;
166                 char buf[256];
167         } ans;
168
169         int id, rc;
170         struct nlattr *na;
171         int rep_len;
172
173         strcpy(name, TASKSTATS_GENL_NAME);
174         rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
175                         CTRL_ATTR_FAMILY_NAME, (void *)name,
176                         strlen(TASKSTATS_GENL_NAME)+1);
177
178         rep_len = recv(sd, &ans, sizeof(ans), 0);
179         if (ans.n.nlmsg_type == NLMSG_ERROR ||
180             (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
181                 return 0;
182
183         na = (struct nlattr *) GENLMSG_DATA(&ans);
184         na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
185         if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
186                 id = *(__u16 *) NLA_DATA(na);
187         }
188         return id;
189 }
190
191 void print_delayacct(struct taskstats *t)
192 {
193         printf("\n\nCPU   %15s%15s%15s%15s\n"
194                "      %15llu%15llu%15llu%15llu\n"
195                "IO    %15s%15s\n"
196                "      %15llu%15llu\n"
197                "MEM   %15s%15s\n"
198                "      %15llu%15llu\n\n",
199                "count", "real total", "virtual total", "delay total",
200                t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
201                t->cpu_delay_total,
202                "count", "delay total",
203                t->blkio_count, t->blkio_delay_total,
204                "count", "delay total", t->swapin_count, t->swapin_delay_total);
205 }
206
207 void print_ioacct(struct taskstats *t)
208 {
209         printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
210                 t->ac_comm,
211                 (unsigned long long)t->read_bytes,
212                 (unsigned long long)t->write_bytes,
213                 (unsigned long long)t->cancelled_write_bytes);
214 }
215
216 int main(int argc, char *argv[])
217 {
218         int c, rc, rep_len, aggr_len, len2, cmd_type;
219         __u16 id;
220         __u32 mypid;
221
222         struct nlattr *na;
223         int nl_sd = -1;
224         int len = 0;
225         pid_t tid = 0;
226         pid_t rtid = 0;
227
228         int fd = 0;
229         int count = 0;
230         int write_file = 0;
231         int maskset = 0;
232         char *logfile = NULL;
233         int loop = 0;
234
235         struct msgtemplate msg;
236
237         while (1) {
238                 c = getopt(argc, argv, "diw:r:m:t:p:vl");
239                 if (c < 0)
240                         break;
241
242                 switch (c) {
243                 case 'd':
244                         printf("print delayacct stats ON\n");
245                         print_delays = 1;
246                         break;
247                 case 'i':
248                         printf("printing IO accounting\n");
249                         print_io_accounting = 1;
250                         break;
251                 case 'w':
252                         logfile = strdup(optarg);
253                         printf("write to file %s\n", logfile);
254                         write_file = 1;
255                         break;
256                 case 'r':
257                         rcvbufsz = atoi(optarg);
258                         printf("receive buf size %d\n", rcvbufsz);
259                         if (rcvbufsz < 0)
260                                 err(1, "Invalid rcv buf size\n");
261                         break;
262                 case 'm':
263                         strncpy(cpumask, optarg, sizeof(cpumask));
264                         maskset = 1;
265                         printf("cpumask %s maskset %d\n", cpumask, maskset);
266                         break;
267                 case 't':
268                         tid = atoi(optarg);
269                         if (!tid)
270                                 err(1, "Invalid tgid\n");
271                         cmd_type = TASKSTATS_CMD_ATTR_TGID;
272                         break;
273                 case 'p':
274                         tid = atoi(optarg);
275                         if (!tid)
276                                 err(1, "Invalid pid\n");
277                         cmd_type = TASKSTATS_CMD_ATTR_PID;
278                         break;
279                 case 'v':
280                         printf("debug on\n");
281                         dbg = 1;
282                         break;
283                 case 'l':
284                         printf("listen forever\n");
285                         loop = 1;
286                         break;
287                 default:
288                         usage();
289                         exit(-1);
290                 }
291         }
292
293         if (write_file) {
294                 fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC,
295                           S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
296                 if (fd == -1) {
297                         perror("Cannot open output file\n");
298                         exit(1);
299                 }
300         }
301
302         if ((nl_sd = create_nl_socket(NETLINK_GENERIC)) < 0)
303                 err(1, "error creating Netlink socket\n");
304
305
306         mypid = getpid();
307         id = get_family_id(nl_sd);
308         if (!id) {
309                 fprintf(stderr, "Error getting family id, errno %d\n", errno);
310                 goto err;
311         }
312         PRINTF("family id %d\n", id);
313
314         if (maskset) {
315                 rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
316                               TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
317                               &cpumask, strlen(cpumask) + 1);
318                 PRINTF("Sent register cpumask, retval %d\n", rc);
319                 if (rc < 0) {
320                         fprintf(stderr, "error sending register cpumask\n");
321                         goto err;
322                 }
323         }
324
325         if (tid) {
326                 rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
327                               cmd_type, &tid, sizeof(__u32));
328                 PRINTF("Sent pid/tgid, retval %d\n", rc);
329                 if (rc < 0) {
330                         fprintf(stderr, "error sending tid/tgid cmd\n");
331                         goto done;
332                 }
333         }
334
335         do {
336                 int i;
337
338                 rep_len = recv(nl_sd, &msg, sizeof(msg), 0);
339                 PRINTF("received %d bytes\n", rep_len);
340
341                 if (rep_len < 0) {
342                         fprintf(stderr, "nonfatal reply error: errno %d\n",
343                                 errno);
344                         continue;
345                 }
346                 if (msg.n.nlmsg_type == NLMSG_ERROR ||
347                     !NLMSG_OK((&msg.n), rep_len)) {
348                         struct nlmsgerr *err = NLMSG_DATA(&msg);
349                         fprintf(stderr, "fatal reply error,  errno %d\n",
350                                 err->error);
351                         goto done;
352                 }
353
354                 PRINTF("nlmsghdr size=%d, nlmsg_len=%d, rep_len=%d\n",
355                        sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len);
356
357
358                 rep_len = GENLMSG_PAYLOAD(&msg.n);
359
360                 na = (struct nlattr *) GENLMSG_DATA(&msg);
361                 len = 0;
362                 i = 0;
363                 while (len < rep_len) {
364                         len += NLA_ALIGN(na->nla_len);
365                         switch (na->nla_type) {
366                         case TASKSTATS_TYPE_AGGR_TGID:
367                                 /* Fall through */
368                         case TASKSTATS_TYPE_AGGR_PID:
369                                 aggr_len = NLA_PAYLOAD(na->nla_len);
370                                 len2 = 0;
371                                 /* For nested attributes, na follows */
372                                 na = (struct nlattr *) NLA_DATA(na);
373                                 done = 0;
374                                 while (len2 < aggr_len) {
375                                         switch (na->nla_type) {
376                                         case TASKSTATS_TYPE_PID:
377                                                 rtid = *(int *) NLA_DATA(na);
378                                                 if (print_delays)
379                                                         printf("PID\t%d\n", rtid);
380                                                 break;
381                                         case TASKSTATS_TYPE_TGID:
382                                                 rtid = *(int *) NLA_DATA(na);
383                                                 if (print_delays)
384                                                         printf("TGID\t%d\n", rtid);
385                                                 break;
386                                         case TASKSTATS_TYPE_STATS:
387                                                 count++;
388                                                 if (print_delays)
389                                                         print_delayacct((struct taskstats *) NLA_DATA(na));
390                                                 if (print_io_accounting)
391                                                         print_ioacct((struct taskstats *) NLA_DATA(na));
392                                                 if (fd) {
393                                                         if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
394                                                                 err(1,"write error\n");
395                                                         }
396                                                 }
397                                                 if (!loop)
398                                                         goto done;
399                                                 break;
400                                         default:
401                                                 fprintf(stderr, "Unknown nested"
402                                                         " nla_type %d\n",
403                                                         na->nla_type);
404                                                 break;
405                                         }
406                                         len2 += NLA_ALIGN(na->nla_len);
407                                         na = (struct nlattr *) ((char *) na + len2);
408                                 }
409                                 break;
410
411                         default:
412                                 fprintf(stderr, "Unknown nla_type %d\n",
413                                         na->nla_type);
414                                 break;
415                         }
416                         na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
417                 }
418         } while (loop);
419 done:
420         if (maskset) {
421                 rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
422                               TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
423                               &cpumask, strlen(cpumask) + 1);
424                 printf("Sent deregister mask, retval %d\n", rc);
425                 if (rc < 0)
426                         err(rc, "error sending deregister cpumask\n");
427         }
428 err:
429         close(nl_sd);
430         if (fd)
431                 close(fd);
432         return 0;
433 }