memory hotremove: unset migrate type "ISOLATE" after removal
[linux-2.6.git] / Documentation / accounting / getdelays.c
1 /* getdelays.c
2  *
3  * Utility to get per-pid and per-tgid delay accounting statistics
4  * Also illustrates usage of the taskstats interface
5  *
6  * Copyright (C) Shailabh Nagar, IBM Corp. 2005
7  * Copyright (C) Balbir Singh, IBM Corp. 2006
8  * Copyright (c) Jay Lan, SGI. 2006
9  *
10  * Compile with
11  *      gcc -I/usr/src/linux/include getdelays.c -o getdelays
12  */
13
14 #include <stdio.h>
15 #include <stdlib.h>
16 #include <errno.h>
17 #include <unistd.h>
18 #include <poll.h>
19 #include <string.h>
20 #include <fcntl.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <sys/socket.h>
24 #include <signal.h>
25
26 #include <linux/genetlink.h>
27 #include <linux/taskstats.h>
28
29 /*
30  * Generic macros for dealing with netlink sockets. Might be duplicated
31  * elsewhere. It is recommended that commercial grade applications use
32  * libnl or libnetlink and use the interfaces provided by the library
33  */
34 #define GENLMSG_DATA(glh)       ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
35 #define GENLMSG_PAYLOAD(glh)    (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
36 #define NLA_DATA(na)            ((void *)((char*)(na) + NLA_HDRLEN))
37 #define NLA_PAYLOAD(len)        (len - NLA_HDRLEN)
38
39 #define err(code, fmt, arg...)                  \
40         do {                                    \
41                 fprintf(stderr, fmt, ##arg);    \
42                 exit(code);                     \
43         } while (0)
44
45 int done;
46 int rcvbufsz;
47 char name[100];
48 int dbg;
49 int print_delays;
50 int print_io_accounting;
51 int print_task_context_switch_counts;
52 __u64 stime, utime;
53
54 #define PRINTF(fmt, arg...) {                   \
55             if (dbg) {                          \
56                 printf(fmt, ##arg);             \
57             }                                   \
58         }
59
60 /* Maximum size of response requested or message sent */
61 #define MAX_MSG_SIZE    1024
62 /* Maximum number of cpus expected to be specified in a cpumask */
63 #define MAX_CPUS        32
64
65 struct msgtemplate {
66         struct nlmsghdr n;
67         struct genlmsghdr g;
68         char buf[MAX_MSG_SIZE];
69 };
70
71 char cpumask[100+6*MAX_CPUS];
72
73 static void usage(void)
74 {
75         fprintf(stderr, "getdelays [-dilv] [-w logfile] [-r bufsize] "
76                         "[-m cpumask] [-t tgid] [-p pid]\n");
77         fprintf(stderr, "  -d: print delayacct stats\n");
78         fprintf(stderr, "  -i: print IO accounting (works only with -p)\n");
79         fprintf(stderr, "  -l: listen forever\n");
80         fprintf(stderr, "  -v: debug on\n");
81 }
82
83 /*
84  * Create a raw netlink socket and bind
85  */
86 static int create_nl_socket(int protocol)
87 {
88         int fd;
89         struct sockaddr_nl local;
90
91         fd = socket(AF_NETLINK, SOCK_RAW, protocol);
92         if (fd < 0)
93                 return -1;
94
95         if (rcvbufsz)
96                 if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
97                                 &rcvbufsz, sizeof(rcvbufsz)) < 0) {
98                         fprintf(stderr, "Unable to set socket rcv buf size "
99                                         "to %d\n",
100                                 rcvbufsz);
101                         return -1;
102                 }
103
104         memset(&local, 0, sizeof(local));
105         local.nl_family = AF_NETLINK;
106
107         if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0)
108                 goto error;
109
110         return fd;
111 error:
112         close(fd);
113         return -1;
114 }
115
116
117 int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
118              __u8 genl_cmd, __u16 nla_type,
119              void *nla_data, int nla_len)
120 {
121         struct nlattr *na;
122         struct sockaddr_nl nladdr;
123         int r, buflen;
124         char *buf;
125
126         struct msgtemplate msg;
127
128         msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
129         msg.n.nlmsg_type = nlmsg_type;
130         msg.n.nlmsg_flags = NLM_F_REQUEST;
131         msg.n.nlmsg_seq = 0;
132         msg.n.nlmsg_pid = nlmsg_pid;
133         msg.g.cmd = genl_cmd;
134         msg.g.version = 0x1;
135         na = (struct nlattr *) GENLMSG_DATA(&msg);
136         na->nla_type = nla_type;
137         na->nla_len = nla_len + 1 + NLA_HDRLEN;
138         memcpy(NLA_DATA(na), nla_data, nla_len);
139         msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
140
141         buf = (char *) &msg;
142         buflen = msg.n.nlmsg_len ;
143         memset(&nladdr, 0, sizeof(nladdr));
144         nladdr.nl_family = AF_NETLINK;
145         while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
146                            sizeof(nladdr))) < buflen) {
147                 if (r > 0) {
148                         buf += r;
149                         buflen -= r;
150                 } else if (errno != EAGAIN)
151                         return -1;
152         }
153         return 0;
154 }
155
156
157 /*
158  * Probe the controller in genetlink to find the family id
159  * for the TASKSTATS family
160  */
161 int get_family_id(int sd)
162 {
163         struct {
164                 struct nlmsghdr n;
165                 struct genlmsghdr g;
166                 char buf[256];
167         } ans;
168
169         int id, rc;
170         struct nlattr *na;
171         int rep_len;
172
173         strcpy(name, TASKSTATS_GENL_NAME);
174         rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
175                         CTRL_ATTR_FAMILY_NAME, (void *)name,
176                         strlen(TASKSTATS_GENL_NAME)+1);
177
178         rep_len = recv(sd, &ans, sizeof(ans), 0);
179         if (ans.n.nlmsg_type == NLMSG_ERROR ||
180             (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len))
181                 return 0;
182
183         na = (struct nlattr *) GENLMSG_DATA(&ans);
184         na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
185         if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
186                 id = *(__u16 *) NLA_DATA(na);
187         }
188         return id;
189 }
190
191 void print_delayacct(struct taskstats *t)
192 {
193         printf("\n\nCPU   %15s%15s%15s%15s\n"
194                "      %15llu%15llu%15llu%15llu\n"
195                "IO    %15s%15s\n"
196                "      %15llu%15llu\n"
197                "MEM   %15s%15s\n"
198                "      %15llu%15llu\n",
199                "count", "real total", "virtual total", "delay total",
200                t->cpu_count, t->cpu_run_real_total, t->cpu_run_virtual_total,
201                t->cpu_delay_total,
202                "count", "delay total",
203                t->blkio_count, t->blkio_delay_total,
204                "count", "delay total", t->swapin_count, t->swapin_delay_total);
205 }
206
207 void task_context_switch_counts(struct taskstats *t)
208 {
209         printf("\n\nTask   %15s%15s\n"
210                "       %15lu%15lu\n",
211                "voluntary", "nonvoluntary",
212                t->nvcsw, t->nivcsw);
213 }
214
215 void print_ioacct(struct taskstats *t)
216 {
217         printf("%s: read=%llu, write=%llu, cancelled_write=%llu\n",
218                 t->ac_comm,
219                 (unsigned long long)t->read_bytes,
220                 (unsigned long long)t->write_bytes,
221                 (unsigned long long)t->cancelled_write_bytes);
222 }
223
224 int main(int argc, char *argv[])
225 {
226         int c, rc, rep_len, aggr_len, len2, cmd_type;
227         __u16 id;
228         __u32 mypid;
229
230         struct nlattr *na;
231         int nl_sd = -1;
232         int len = 0;
233         pid_t tid = 0;
234         pid_t rtid = 0;
235
236         int fd = 0;
237         int count = 0;
238         int write_file = 0;
239         int maskset = 0;
240         char *logfile = NULL;
241         int loop = 0;
242
243         struct msgtemplate msg;
244
245         while (1) {
246                 c = getopt(argc, argv, "qdiw:r:m:t:p:vl");
247                 if (c < 0)
248                         break;
249
250                 switch (c) {
251                 case 'd':
252                         printf("print delayacct stats ON\n");
253                         print_delays = 1;
254                         break;
255                 case 'i':
256                         printf("printing IO accounting\n");
257                         print_io_accounting = 1;
258                         break;
259                 case 'q':
260                         printf("printing task/process context switch rates\n");
261                         print_task_context_switch_counts = 1;
262                         break;
263                 case 'w':
264                         logfile = strdup(optarg);
265                         printf("write to file %s\n", logfile);
266                         write_file = 1;
267                         break;
268                 case 'r':
269                         rcvbufsz = atoi(optarg);
270                         printf("receive buf size %d\n", rcvbufsz);
271                         if (rcvbufsz < 0)
272                                 err(1, "Invalid rcv buf size\n");
273                         break;
274                 case 'm':
275                         strncpy(cpumask, optarg, sizeof(cpumask));
276                         maskset = 1;
277                         printf("cpumask %s maskset %d\n", cpumask, maskset);
278                         break;
279                 case 't':
280                         tid = atoi(optarg);
281                         if (!tid)
282                                 err(1, "Invalid tgid\n");
283                         cmd_type = TASKSTATS_CMD_ATTR_TGID;
284                         break;
285                 case 'p':
286                         tid = atoi(optarg);
287                         if (!tid)
288                                 err(1, "Invalid pid\n");
289                         cmd_type = TASKSTATS_CMD_ATTR_PID;
290                         break;
291                 case 'v':
292                         printf("debug on\n");
293                         dbg = 1;
294                         break;
295                 case 'l':
296                         printf("listen forever\n");
297                         loop = 1;
298                         break;
299                 default:
300                         usage();
301                         exit(-1);
302                 }
303         }
304
305         if (write_file) {
306                 fd = open(logfile, O_WRONLY | O_CREAT | O_TRUNC,
307                           S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
308                 if (fd == -1) {
309                         perror("Cannot open output file\n");
310                         exit(1);
311                 }
312         }
313
314         if ((nl_sd = create_nl_socket(NETLINK_GENERIC)) < 0)
315                 err(1, "error creating Netlink socket\n");
316
317
318         mypid = getpid();
319         id = get_family_id(nl_sd);
320         if (!id) {
321                 fprintf(stderr, "Error getting family id, errno %d\n", errno);
322                 goto err;
323         }
324         PRINTF("family id %d\n", id);
325
326         if (maskset) {
327                 rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
328                               TASKSTATS_CMD_ATTR_REGISTER_CPUMASK,
329                               &cpumask, strlen(cpumask) + 1);
330                 PRINTF("Sent register cpumask, retval %d\n", rc);
331                 if (rc < 0) {
332                         fprintf(stderr, "error sending register cpumask\n");
333                         goto err;
334                 }
335         }
336
337         if (tid) {
338                 rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
339                               cmd_type, &tid, sizeof(__u32));
340                 PRINTF("Sent pid/tgid, retval %d\n", rc);
341                 if (rc < 0) {
342                         fprintf(stderr, "error sending tid/tgid cmd\n");
343                         goto done;
344                 }
345         }
346
347         do {
348                 int i;
349
350                 rep_len = recv(nl_sd, &msg, sizeof(msg), 0);
351                 PRINTF("received %d bytes\n", rep_len);
352
353                 if (rep_len < 0) {
354                         fprintf(stderr, "nonfatal reply error: errno %d\n",
355                                 errno);
356                         continue;
357                 }
358                 if (msg.n.nlmsg_type == NLMSG_ERROR ||
359                     !NLMSG_OK((&msg.n), rep_len)) {
360                         struct nlmsgerr *err = NLMSG_DATA(&msg);
361                         fprintf(stderr, "fatal reply error,  errno %d\n",
362                                 err->error);
363                         goto done;
364                 }
365
366                 PRINTF("nlmsghdr size=%d, nlmsg_len=%d, rep_len=%d\n",
367                        sizeof(struct nlmsghdr), msg.n.nlmsg_len, rep_len);
368
369
370                 rep_len = GENLMSG_PAYLOAD(&msg.n);
371
372                 na = (struct nlattr *) GENLMSG_DATA(&msg);
373                 len = 0;
374                 i = 0;
375                 while (len < rep_len) {
376                         len += NLA_ALIGN(na->nla_len);
377                         switch (na->nla_type) {
378                         case TASKSTATS_TYPE_AGGR_TGID:
379                                 /* Fall through */
380                         case TASKSTATS_TYPE_AGGR_PID:
381                                 aggr_len = NLA_PAYLOAD(na->nla_len);
382                                 len2 = 0;
383                                 /* For nested attributes, na follows */
384                                 na = (struct nlattr *) NLA_DATA(na);
385                                 done = 0;
386                                 while (len2 < aggr_len) {
387                                         switch (na->nla_type) {
388                                         case TASKSTATS_TYPE_PID:
389                                                 rtid = *(int *) NLA_DATA(na);
390                                                 if (print_delays)
391                                                         printf("PID\t%d\n", rtid);
392                                                 break;
393                                         case TASKSTATS_TYPE_TGID:
394                                                 rtid = *(int *) NLA_DATA(na);
395                                                 if (print_delays)
396                                                         printf("TGID\t%d\n", rtid);
397                                                 break;
398                                         case TASKSTATS_TYPE_STATS:
399                                                 count++;
400                                                 if (print_delays)
401                                                         print_delayacct((struct taskstats *) NLA_DATA(na));
402                                                 if (print_io_accounting)
403                                                         print_ioacct((struct taskstats *) NLA_DATA(na));
404                                                 if (print_task_context_switch_counts)
405                                                         task_context_switch_counts((struct taskstats *) NLA_DATA(na));
406                                                 if (fd) {
407                                                         if (write(fd, NLA_DATA(na), na->nla_len) < 0) {
408                                                                 err(1,"write error\n");
409                                                         }
410                                                 }
411                                                 if (!loop)
412                                                         goto done;
413                                                 break;
414                                         default:
415                                                 fprintf(stderr, "Unknown nested"
416                                                         " nla_type %d\n",
417                                                         na->nla_type);
418                                                 break;
419                                         }
420                                         len2 += NLA_ALIGN(na->nla_len);
421                                         na = (struct nlattr *) ((char *) na + len2);
422                                 }
423                                 break;
424
425                         default:
426                                 fprintf(stderr, "Unknown nla_type %d\n",
427                                         na->nla_type);
428                                 break;
429                         }
430                         na = (struct nlattr *) (GENLMSG_DATA(&msg) + len);
431                 }
432         } while (loop);
433 done:
434         if (maskset) {
435                 rc = send_cmd(nl_sd, id, mypid, TASKSTATS_CMD_GET,
436                               TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK,
437                               &cpumask, strlen(cpumask) + 1);
438                 printf("Sent deregister mask, retval %d\n", rc);
439                 if (rc < 0)
440                         err(rc, "error sending deregister cpumask\n");
441         }
442 err:
443         close(nl_sd);
444         if (fd)
445                 close(fd);
446         return 0;
447 }