netfilter: xt_qtaguid: 1st pass at tracking tag based data resources
[linux-2.6.git] / net / netfilter / xt_qtaguid.c
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /*
12  * There are run-time debug flags enabled via the debug_mask module param, or
13  * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14  */
15 #define DEBUG
16
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/netfilter/x_tables.h>
21 #include <linux/netfilter/xt_qtaguid.h>
22 #include <linux/skbuff.h>
23 #include <linux/workqueue.h>
24 #include <net/addrconf.h>
25 #include <net/sock.h>
26 #include <net/tcp.h>
27 #include <net/udp.h>
28
29 #include <linux/netfilter/xt_socket.h>
30 #include "xt_qtaguid_internal.h"
31 #include "xt_qtaguid_print.h"
32
33 /*
34  * We only use the xt_socket funcs within a similar context to avoid unexpected
35  * return values.
36  */
37 #define XT_SOCKET_SUPPORTED_HOOKS \
38         ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
39
40
41 static const char *module_procdirname = "xt_qtaguid";
42 static struct proc_dir_entry *xt_qtaguid_procdir;
43
44 static unsigned int proc_iface_perms = S_IRUGO;
45 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
46
47 static struct proc_dir_entry *xt_qtaguid_stats_file;
48 static unsigned int proc_stats_perms = S_IRUGO;
49 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
50
51 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
52 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
53 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
54 #else
55 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR;
56 #endif
57 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
58
59 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
60 #include <linux/android_aid.h>
61 static gid_t proc_stats_readall_gid = AID_NET_BW_STATS;
62 static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT;
63 #else
64 /* 0 means, don't limit anybody */
65 static gid_t proc_stats_readall_gid;
66 static gid_t proc_ctrl_write_gid;
67 #endif
68 module_param_named(stats_readall_gid, proc_stats_readall_gid, uint,
69                    S_IRUGO | S_IWUSR);
70 module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint,
71                    S_IRUGO | S_IWUSR);
72
73 /*
74  * Limit the number of active tags (via socket tags) for a given UID.
75  * Multiple processes could share the UID.
76  */
77 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
78 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
79
80 /*
81  * After the kernel has initiallized this module, it is still possible
82  * to make it passive.
83  * Setting passive to Y:
84  *  - the iface stats handling will not act on notifications.
85  *  - iptables matches will never match.
86  *  - ctrl commands silently succeed.
87  *  - stats are always empty.
88  * This is mostly usefull when a bug is suspected.
89  */
90 static bool module_passive;
91 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
92
93 /*
94  * Control how qtaguid data is tracked per proc/uid.
95  * Setting tag_tracking_passive to Y:
96  *  - don't create proc specific structs to track tags
97  *  - don't check that active tag stats exceed some limits.
98  *  - don't clean up socket tags on process exits.
99  * This is mostly usefull when a bug is suspected.
100  */
101 static bool qtu_proc_handling_passive;
102 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
103                    S_IRUGO | S_IWUSR);
104
105
106 #define QTU_DEV_NAME "xt_qtaguid"
107
108 uint debug_mask = DEFAULT_DEBUG_MASK;
109 module_param(debug_mask, uint, S_IRUGO | S_IWUSR);
110
111 /*---------------------------------------------------------------------------*/
112 static const char *iface_stat_procdirname = "iface_stat";
113 static struct proc_dir_entry *iface_stat_procdir;
114
115 /*
116  * Ordering of locks:
117  *  outer locks:
118  *    iface_stat_list_lock
119  *    sock_tag_list_lock
120  *  inner locks:
121  *    uid_tag_data_tree_lock
122  *    tag_counter_set_list_lock
123  * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
124  * is acquired.
125  *
126  * Call tree with all lock holders as of 2011-09-06:
127  *
128  *   qtaguid_ctrl_parse()
129  *     ctrl_cmd_delete()
130  *       sock_tag_list_lock
131  *       tag_counter_set_list_lock
132  *       iface_stat_list_lock
133  *         iface_entry->tag_stat_list_lock
134  *       uid_tag_data_tree_lock
135  *     ctrl_cmd_counter_set()
136  *       tag_counter_set_list_lock
137  *     ctrl_cmd_tag()
138  *       sock_tag_list_lock
139  *         get_tag_ref()
140  *           uid_tag_data_tree_lock
141  *       uid_tag_data_tree_lock
142  *     ctrl_cmd_untag()
143  *       sock_tag_list_lock
144  *         uid_tag_data_tree_lock
145  *
146  *   qtaguid_mt()
147  *     account_for_uid()
148  *       if_tag_stat_update()
149  *     get_sock_stat()
150  *       sock_tag_list_lock
151  *        iface_entry->tag_stat_list_lock
152  *        tag_stat_update()
153  *          get_active_counter_set()
154  *            tag_counter_set_list_lock
155  *
156  *   iface_netdev_event_handler()
157  *     iface_stat_create()
158  *       iface_stat_list_lock
159  *     iface_stat_update()
160  *       iface_stat_list_lock
161  *
162  *   iface_inet6addr_event_handler()
163  *     iface_stat_create_ipv6()
164  *       iface_stat_list_lock
165  *     iface_stat_update()
166  *       iface_stat_list_lock
167  *
168  *   iface_inetaddr_event_handler()
169  *     iface_stat_create()
170  *       iface_stat_list_lock
171  *     iface_stat_update()
172  *       iface_stat_list_lock
173  *
174  *   qtaguid_ctrl_proc_read()
175  *     sock_tag_list_lock
176  *     sock_tag_list_lock
177  *     uid_tag_data_tree_lock
178  *     iface_stat_list_lock
179  *
180  *   qtaguid_stats_proc_read()
181  *     iface_stat_list_lock
182  *       iface_entry->tag_stat_list_lock
183  *
184  *   qtudev_open()
185  *     uid_tag_data_tree_lock
186  *
187  *   qtud_dev_release()
188  *     sock_tag_list_lock
189  *       uid_tag_data_tree_lock
190  */
191 static LIST_HEAD(iface_stat_list);
192 static DEFINE_SPINLOCK(iface_stat_list_lock);
193
194 static struct rb_root sock_tag_tree = RB_ROOT;
195 static DEFINE_SPINLOCK(sock_tag_list_lock);
196
197 static struct rb_root tag_counter_set_tree = RB_ROOT;
198 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
199
200 static struct rb_root uid_tag_data_tree = RB_ROOT;
201 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
202
203 static struct rb_root proc_qtu_data_tree = RB_ROOT;
204 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
205
206 static struct qtaguid_event_counts qtu_events;
207 /*----------------------------------------------*/
208 static bool can_manipulate_uids(void)
209 {
210         /* root pwnd */
211         return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid)
212                 || in_egroup_p(proc_ctrl_write_gid);
213 }
214
215 static bool can_impersonate_uid(uid_t uid)
216 {
217         return uid == current_fsuid() || can_manipulate_uids();
218 }
219
220 static bool can_read_other_uid_stats(uid_t uid)
221 {
222         /* root pwnd */
223         return unlikely(!current_fsuid()) || uid == current_fsuid()
224                 || unlikely(!proc_stats_readall_gid)
225                 || in_egroup_p(proc_stats_readall_gid);
226 }
227
228 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
229                                   enum ifs_tx_rx direction,
230                                   enum ifs_proto ifs_proto,
231                                   int bytes,
232                                   int packets)
233 {
234         counters->bpc[set][direction][ifs_proto].bytes += bytes;
235         counters->bpc[set][direction][ifs_proto].packets += packets;
236 }
237
238 static inline uint64_t dc_sum_bytes(struct data_counters *counters,
239                                     int set,
240                                     enum ifs_tx_rx direction)
241 {
242         return counters->bpc[set][direction][IFS_TCP].bytes
243                 + counters->bpc[set][direction][IFS_UDP].bytes
244                 + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
245 }
246
247 static inline uint64_t dc_sum_packets(struct data_counters *counters,
248                                       int set,
249                                       enum ifs_tx_rx direction)
250 {
251         return counters->bpc[set][direction][IFS_TCP].packets
252                 + counters->bpc[set][direction][IFS_UDP].packets
253                 + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
254 }
255
256 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
257 {
258         struct rb_node *node = root->rb_node;
259
260         while (node) {
261                 struct tag_node *data = rb_entry(node, struct tag_node, node);
262                 int result;
263                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
264                          " node=%p data=%p\n", tag, node, data);
265                 result = tag_compare(tag, data->tag);
266                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
267                          " data.tag=0x%llx (uid=%u) res=%d\n",
268                          tag, data->tag, get_uid_from_tag(data->tag), result);
269                 if (result < 0)
270                         node = node->rb_left;
271                 else if (result > 0)
272                         node = node->rb_right;
273                 else
274                         return data;
275         }
276         return NULL;
277 }
278
279 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
280 {
281         struct rb_node **new = &(root->rb_node), *parent = NULL;
282
283         /* Figure out where to put new node */
284         while (*new) {
285                 struct tag_node *this = rb_entry(*new, struct tag_node,
286                                                  node);
287                 int result = tag_compare(data->tag, this->tag);
288                 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
289                          " (uid=%u)\n", __func__,
290                          this->tag,
291                          get_uid_from_tag(this->tag));
292                 parent = *new;
293                 if (result < 0)
294                         new = &((*new)->rb_left);
295                 else if (result > 0)
296                         new = &((*new)->rb_right);
297                 else
298                         BUG();
299         }
300
301         /* Add new node and rebalance tree. */
302         rb_link_node(&data->node, parent, new);
303         rb_insert_color(&data->node, root);
304 }
305
306 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
307 {
308         tag_node_tree_insert(&data->tn, root);
309 }
310
311 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
312 {
313         struct tag_node *node = tag_node_tree_search(root, tag);
314         if (!node)
315                 return NULL;
316         return rb_entry(&node->node, struct tag_stat, tn.node);
317 }
318
319 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
320                                         struct rb_root *root)
321 {
322         tag_node_tree_insert(&data->tn, root);
323 }
324
325 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
326                                                            tag_t tag)
327 {
328         struct tag_node *node = tag_node_tree_search(root, tag);
329         if (!node)
330                 return NULL;
331         return rb_entry(&node->node, struct tag_counter_set, tn.node);
332
333 }
334
335 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
336 {
337         tag_node_tree_insert(&data->tn, root);
338 }
339
340 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
341 {
342         struct tag_node *node = tag_node_tree_search(root, tag);
343         if (!node)
344                 return NULL;
345         return rb_entry(&node->node, struct tag_ref, tn.node);
346 }
347
348 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
349                                              const struct sock *sk)
350 {
351         struct rb_node *node = root->rb_node;
352
353         while (node) {
354                 struct sock_tag *data = rb_entry(node, struct sock_tag,
355                                                  sock_node);
356                 if (sk < data->sk)
357                         node = node->rb_left;
358                 else if (sk > data->sk)
359                         node = node->rb_right;
360                 else
361                         return data;
362         }
363         return NULL;
364 }
365
366 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
367 {
368         struct rb_node **new = &(root->rb_node), *parent = NULL;
369
370         /* Figure out where to put new node */
371         while (*new) {
372                 struct sock_tag *this = rb_entry(*new, struct sock_tag,
373                                                  sock_node);
374                 parent = *new;
375                 if (data->sk < this->sk)
376                         new = &((*new)->rb_left);
377                 else if (data->sk > this->sk)
378                         new = &((*new)->rb_right);
379                 else
380                         BUG();
381         }
382
383         /* Add new node and rebalance tree. */
384         rb_link_node(&data->sock_node, parent, new);
385         rb_insert_color(&data->sock_node, root);
386 }
387
388 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
389 {
390         struct rb_node *node;
391         struct sock_tag *st_entry;
392
393         node = rb_first(st_to_free_tree);
394         while (node) {
395                 st_entry = rb_entry(node, struct sock_tag, sock_node);
396                 node = rb_next(node);
397                 CT_DEBUG("qtaguid: %s(): "
398                          "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
399                          st_entry->sk,
400                          st_entry->tag,
401                          get_uid_from_tag(st_entry->tag));
402                 rb_erase(&st_entry->sock_node, st_to_free_tree);
403                 sockfd_put(st_entry->socket);
404                 kfree(st_entry);
405         }
406 }
407
408 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
409                                                        const pid_t pid)
410 {
411         struct rb_node *node = root->rb_node;
412
413         while (node) {
414                 struct proc_qtu_data *data = rb_entry(node,
415                                                       struct proc_qtu_data,
416                                                       node);
417                 if (pid < data->pid)
418                         node = node->rb_left;
419                 else if (pid > data->pid)
420                         node = node->rb_right;
421                 else
422                         return data;
423         }
424         return NULL;
425 }
426
427 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
428                                       struct rb_root *root)
429 {
430         struct rb_node **new = &(root->rb_node), *parent = NULL;
431
432         /* Figure out where to put new node */
433         while (*new) {
434                 struct proc_qtu_data *this = rb_entry(*new,
435                                                       struct proc_qtu_data,
436                                                       node);
437                 parent = *new;
438                 if (data->pid < this->pid)
439                         new = &((*new)->rb_left);
440                 else if (data->pid > this->pid)
441                         new = &((*new)->rb_right);
442                 else
443                         BUG();
444         }
445
446         /* Add new node and rebalance tree. */
447         rb_link_node(&data->node, parent, new);
448         rb_insert_color(&data->node, root);
449 }
450
451 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
452                                      struct rb_root *root)
453 {
454         struct rb_node **new = &(root->rb_node), *parent = NULL;
455
456         /* Figure out where to put new node */
457         while (*new) {
458                 struct uid_tag_data *this = rb_entry(*new,
459                                                      struct uid_tag_data,
460                                                      node);
461                 parent = *new;
462                 if (data->uid < this->uid)
463                         new = &((*new)->rb_left);
464                 else if (data->uid > this->uid)
465                         new = &((*new)->rb_right);
466                 else
467                         BUG();
468         }
469
470         /* Add new node and rebalance tree. */
471         rb_link_node(&data->node, parent, new);
472         rb_insert_color(&data->node, root);
473 }
474
475 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
476                                                      uid_t uid)
477 {
478         struct rb_node *node = root->rb_node;
479
480         while (node) {
481                 struct uid_tag_data *data = rb_entry(node,
482                                                      struct uid_tag_data,
483                                                      node);
484                 if (uid < data->uid)
485                         node = node->rb_left;
486                 else if (uid > data->uid)
487                         node = node->rb_right;
488                 else
489                         return data;
490         }
491         return NULL;
492 }
493
494 /*
495  * Allocates a new uid_tag_data struct if needed.
496  * Returns a pointer to the found or allocated uid_tag_data.
497  * Returns a PTR_ERR on failures, and lock is not held.
498  * If found is not NULL:
499  *   sets *found to true if not allocated.
500  *   sets *found to false if allocated.
501  */
502 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
503 {
504         struct uid_tag_data *utd_entry;
505
506         /* Look for top level uid_tag_data for the UID */
507         utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
508         DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
509
510         if (found_res)
511                 *found_res = utd_entry;
512         if (utd_entry)
513                 return utd_entry;
514
515         utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
516         if (!utd_entry) {
517                 pr_err("qtaguid: get_uid_data(%u): "
518                        "tag data alloc failed\n", uid);
519                 return ERR_PTR(-ENOMEM);
520         }
521
522         utd_entry->uid = uid;
523         utd_entry->tag_ref_tree = RB_ROOT;
524         uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
525         DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
526         return utd_entry;
527 }
528
529 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
530 static struct tag_ref *new_tag_ref(tag_t new_tag,
531                                    struct uid_tag_data *utd_entry)
532 {
533         struct tag_ref *tr_entry;
534         int res;
535
536         if (utd_entry->num_active_tags + 1 > max_sock_tags) {
537                 pr_info("qtaguid: new_tag_ref(0x%llx): "
538                         "tag ref alloc quota exceeded. max=%d\n",
539                         new_tag, max_sock_tags);
540                 res = -EMFILE;
541                 goto err_res;
542
543         }
544
545         tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
546         if (!tr_entry) {
547                 pr_err("qtaguid: new_tag_ref(0x%llx): "
548                        "tag ref alloc failed\n",
549                        new_tag);
550                 res = -ENOMEM;
551                 goto err_res;
552         }
553         tr_entry->tn.tag = new_tag;
554         /* tr_entry->num_sock_tags  handled by caller */
555         utd_entry->num_active_tags++;
556         tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
557         DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
558                  " inserted new tag ref\n",
559                  new_tag);
560         return tr_entry;
561
562 err_res:
563         return ERR_PTR(res);
564 }
565
566 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
567                                       struct uid_tag_data **utd_res)
568 {
569         struct uid_tag_data *utd_entry;
570         struct tag_ref *tr_entry;
571         bool found_utd;
572         uid_t uid = get_uid_from_tag(full_tag);
573
574         DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
575                  full_tag, uid);
576
577         utd_entry = get_uid_data(uid, &found_utd);
578         if (IS_ERR_OR_NULL(utd_entry)) {
579                 if (utd_res)
580                         *utd_res = utd_entry;
581                 return NULL;
582         }
583
584         tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
585         if (utd_res)
586                 *utd_res = utd_entry;
587         DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
588                  full_tag, utd_entry, tr_entry);
589         return tr_entry;
590 }
591
592 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
593 static struct tag_ref *get_tag_ref(tag_t full_tag,
594                                    struct uid_tag_data **utd_res)
595 {
596         struct uid_tag_data *utd_entry;
597         struct tag_ref *tr_entry;
598
599         DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
600                  full_tag);
601         spin_lock_bh(&uid_tag_data_tree_lock);
602         tr_entry = lookup_tag_ref(full_tag, &utd_entry);
603         BUG_ON(IS_ERR_OR_NULL(utd_entry));
604         if (!tr_entry)
605                 tr_entry = new_tag_ref(full_tag, utd_entry);
606
607         spin_unlock_bh(&uid_tag_data_tree_lock);
608         if (utd_res)
609                 *utd_res = utd_entry;
610         DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
611                  full_tag, utd_entry, tr_entry);
612         return tr_entry;
613 }
614
615 /* Checks and maybe frees the UID Tag Data entry */
616 static void put_utd_entry(struct uid_tag_data *utd_entry)
617 {
618         /* Are we done with the UID tag data entry? */
619         if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree)) {
620                 DR_DEBUG("qtaguid: %s(): "
621                          "erase utd_entry=%p uid=%u "
622                          "by pid=%u tgid=%u uid=%u\n", __func__,
623                          utd_entry, utd_entry->uid,
624                          current->pid, current->tgid, current_fsuid());
625                 BUG_ON(utd_entry->num_active_tags);
626                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
627                 kfree(utd_entry);
628         } else {
629                 DR_DEBUG("qtaguid: %s(): "
630                          "utd_entry=%p still has %d tags\n", __func__,
631                          utd_entry, utd_entry->num_active_tags);
632                 BUG_ON(!utd_entry->num_active_tags);
633         }
634 }
635
636 /*
637  * If no sock_tags are using this tag_ref,
638  * decrements refcount of utd_entry, removes tr_entry
639  * from utd_entry->tag_ref_tree and frees.
640  */
641 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
642                                         struct uid_tag_data *utd_entry)
643 {
644         DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
645                  tr_entry, tr_entry->tn.tag,
646                  get_uid_from_tag(tr_entry->tn.tag));
647         if (!tr_entry->num_sock_tags) {
648                 BUG_ON(!utd_entry->num_active_tags);
649                 utd_entry->num_active_tags--;
650                 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
651                 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
652                 kfree(tr_entry);
653         }
654 }
655
656 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
657 {
658         struct rb_node *node;
659         struct tag_ref *tr_entry;
660         tag_t acct_tag;
661
662         DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
663                  full_tag, get_uid_from_tag(full_tag));
664         acct_tag = get_atag_from_tag(full_tag);
665         node = rb_first(&utd_entry->tag_ref_tree);
666         while (node) {
667                 tr_entry = rb_entry(node, struct tag_ref, tn.node);
668                 node = rb_next(node);
669                 if (!acct_tag || tr_entry->tn.tag == full_tag)
670                         free_tag_ref_from_utd_entry(tr_entry, utd_entry);
671         }
672 }
673
674 static int read_proc_u64(char *page, char **start, off_t off,
675                         int count, int *eof, void *data)
676 {
677         int len;
678         uint64_t value;
679         char *p = page;
680         uint64_t *iface_entry = data;
681
682         if (!data)
683                 return 0;
684
685         value = *iface_entry;
686         p += sprintf(p, "%llu\n", value);
687         len = (p - page) - off;
688         *eof = (len <= count) ? 1 : 0;
689         *start = page + off;
690         return len;
691 }
692
693 static int read_proc_bool(char *page, char **start, off_t off,
694                         int count, int *eof, void *data)
695 {
696         int len;
697         bool value;
698         char *p = page;
699         bool *bool_entry = data;
700
701         if (!data)
702                 return 0;
703
704         value = *bool_entry;
705         p += sprintf(p, "%u\n", value);
706         len = (p - page) - off;
707         *eof = (len <= count) ? 1 : 0;
708         *start = page + off;
709         return len;
710 }
711
712 static int get_active_counter_set(tag_t tag)
713 {
714         int active_set = 0;
715         struct tag_counter_set *tcs;
716
717         MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
718                  " (uid=%u)\n",
719                  tag, get_uid_from_tag(tag));
720         /* For now we only handle UID tags for active sets */
721         tag = get_utag_from_tag(tag);
722         spin_lock_bh(&tag_counter_set_list_lock);
723         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
724         if (tcs)
725                 active_set = tcs->active_set;
726         spin_unlock_bh(&tag_counter_set_list_lock);
727         return active_set;
728 }
729
730 /*
731  * Find the entry for tracking the specified interface.
732  * Caller must hold iface_stat_list_lock
733  */
734 static struct iface_stat *get_iface_entry(const char *ifname)
735 {
736         struct iface_stat *iface_entry;
737
738         /* Find the entry for tracking the specified tag within the interface */
739         if (ifname == NULL) {
740                 pr_info("qtaguid: iface_stat: get() NULL device name\n");
741                 return NULL;
742         }
743
744         /* Iterate over interfaces */
745         list_for_each_entry(iface_entry, &iface_stat_list, list) {
746                 if (!strcmp(ifname, iface_entry->ifname))
747                         goto done;
748         }
749         iface_entry = NULL;
750 done:
751         return iface_entry;
752 }
753
754 static void iface_create_proc_worker(struct work_struct *work)
755 {
756         struct proc_dir_entry *proc_entry;
757         struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
758                                                    iface_work);
759         struct iface_stat *new_iface  = isw->iface_entry;
760
761         /* iface_entries are not deleted, so safe to manipulate. */
762         proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
763         if (IS_ERR_OR_NULL(proc_entry)) {
764                 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
765                 kfree(isw);
766                 return;
767         }
768
769         new_iface->proc_ptr = proc_entry;
770
771         create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
772                         read_proc_u64, &new_iface->tx_bytes);
773         create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
774                         read_proc_u64, &new_iface->rx_bytes);
775         create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
776                         read_proc_u64, &new_iface->tx_packets);
777         create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
778                         read_proc_u64, &new_iface->rx_packets);
779         create_proc_read_entry("active", proc_iface_perms, proc_entry,
780                         read_proc_bool, &new_iface->active);
781
782         IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
783                  "entry=%p dev=%s\n", new_iface, new_iface->ifname);
784         kfree(isw);
785 }
786
787 /* Caller must hold iface_stat_list_lock */
788 static struct iface_stat *iface_alloc(const char *ifname)
789 {
790         struct iface_stat *new_iface;
791         struct iface_stat_work *isw;
792
793         new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
794         if (new_iface == NULL) {
795                 pr_err("qtaguid: iface_stat: create(%s): "
796                        "iface_stat alloc failed\n", ifname);
797                 return NULL;
798         }
799         new_iface->ifname = kstrdup(ifname, GFP_ATOMIC);
800         if (new_iface->ifname == NULL) {
801                 pr_err("qtaguid: iface_stat: create(%s): "
802                        "ifname alloc failed\n", ifname);
803                 kfree(new_iface);
804                 return NULL;
805         }
806         spin_lock_init(&new_iface->tag_stat_list_lock);
807         new_iface->active = true;
808         new_iface->tag_stat_tree = RB_ROOT;
809
810         /*
811          * ipv6 notifier chains are atomic :(
812          * No create_proc_read_entry() for you!
813          */
814         isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
815         if (!isw) {
816                 pr_err("qtaguid: iface_stat: create(%s): "
817                        "work alloc failed\n", new_iface->ifname);
818                 kfree(new_iface->ifname);
819                 kfree(new_iface);
820                 return NULL;
821         }
822         isw->iface_entry = new_iface;
823         INIT_WORK(&isw->iface_work, iface_create_proc_worker);
824         schedule_work(&isw->iface_work);
825         list_add(&new_iface->list, &iface_stat_list);
826         return new_iface;
827 }
828
829 /*
830  * Create a new entry for tracking the specified interface.
831  * Do nothing if the entry already exists.
832  * Called when an interface is configured with a valid IP address.
833  */
834 void iface_stat_create(const struct net_device *net_dev,
835                        struct in_ifaddr *ifa)
836 {
837         struct in_device *in_dev = NULL;
838         const char *ifname;
839         struct iface_stat *entry;
840         __be32 ipaddr = 0;
841         struct iface_stat *new_iface;
842
843         IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
844                  net_dev ? net_dev->name : "?",
845                  ifa, net_dev);
846         if (!net_dev) {
847                 pr_err("qtaguid: iface_stat: create(): no net dev\n");
848                 return;
849         }
850
851         ifname = net_dev->name;
852         if (!ifa) {
853                 in_dev = in_dev_get(net_dev);
854                 if (!in_dev) {
855                         pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
856                                ifname);
857                         return;
858                 }
859                 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
860                          ifname, in_dev);
861                 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
862                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
863                                  "ifa=%p ifa_label=%s\n",
864                                  ifname, ifa,
865                                  ifa->ifa_label ? ifa->ifa_label : "(null)");
866                         if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
867                                 break;
868                 }
869         }
870
871         if (!ifa) {
872                 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
873                          ifname);
874                 goto done_put;
875         }
876         ipaddr = ifa->ifa_local;
877
878         spin_lock_bh(&iface_stat_list_lock);
879         entry = get_iface_entry(ifname);
880         if (entry != NULL) {
881                 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
882                          ifname, entry);
883                 if (ipv4_is_loopback(ipaddr)) {
884                         entry->active = false;
885                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
886                                  "disable tracking of loopback dev\n",
887                                  ifname);
888                 } else {
889                         entry->active = true;
890                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
891                                  "enable tracking. ip=%pI4\n",
892                                  ifname, &ipaddr);
893                 }
894                 goto done_unlock_put;
895         } else if (ipv4_is_loopback(ipaddr)) {
896                 IF_DEBUG("qtaguid: iface_stat: create(%s): "
897                          "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr);
898                 goto done_unlock_put;
899         }
900
901         new_iface = iface_alloc(ifname);
902         IF_DEBUG("qtaguid: iface_stat: create(%s): done "
903                  "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
904
905 done_unlock_put:
906         spin_unlock_bh(&iface_stat_list_lock);
907 done_put:
908         if (in_dev)
909                 in_dev_put(in_dev);
910 }
911
912 void iface_stat_create_ipv6(const struct net_device *net_dev,
913                             struct inet6_ifaddr *ifa)
914 {
915         struct in_device *in_dev;
916         const char *ifname;
917         struct iface_stat *entry;
918         struct iface_stat *new_iface;
919         int addr_type;
920
921         IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
922                  ifa, net_dev, net_dev ? net_dev->name : "");
923         if (!net_dev) {
924                 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
925                 return;
926         }
927         ifname = net_dev->name;
928
929         in_dev = in_dev_get(net_dev);
930         if (!in_dev) {
931                 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
932                        ifname);
933                 return;
934         }
935
936         IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
937                  ifname, in_dev);
938
939         if (!ifa) {
940                 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
941                          ifname);
942                 goto done_put;
943         }
944         addr_type = ipv6_addr_type(&ifa->addr);
945
946         spin_lock_bh(&iface_stat_list_lock);
947         entry = get_iface_entry(ifname);
948         if (entry != NULL) {
949                 IF_DEBUG("qtaguid: iface_stat: create6(%s): entry=%p\n",
950                          ifname, entry);
951                 if (addr_type & IPV6_ADDR_LOOPBACK) {
952                         entry->active = false;
953                         IF_DEBUG("qtaguid: iface_stat: create6(%s): "
954                                  "disable tracking of loopback dev\n",
955                                  ifname);
956                 } else {
957                         entry->active = true;
958                         IF_DEBUG("qtaguid: iface_stat: create6(%s): "
959                                  "enable tracking. ip=%pI6c\n",
960                                  ifname, &ifa->addr);
961                 }
962                 goto done_unlock_put;
963         } else if (addr_type & IPV6_ADDR_LOOPBACK) {
964                 IF_DEBUG("qtaguid: iface_stat: create6(%s): "
965                          "ignore loopback dev. ip=%pI6c\n",
966                          ifname, &ifa->addr);
967                 goto done_unlock_put;
968         }
969
970         new_iface = iface_alloc(ifname);
971         IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
972                  "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
973
974 done_unlock_put:
975         spin_unlock_bh(&iface_stat_list_lock);
976 done_put:
977         in_dev_put(in_dev);
978 }
979
980 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
981 {
982         MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
983         return sock_tag_tree_search(&sock_tag_tree, sk);
984 }
985
986 static struct sock_tag *get_sock_stat(const struct sock *sk)
987 {
988         struct sock_tag *sock_tag_entry;
989         MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
990         if (!sk)
991                 return NULL;
992         spin_lock_bh(&sock_tag_list_lock);
993         sock_tag_entry = get_sock_stat_nl(sk);
994         spin_unlock_bh(&sock_tag_list_lock);
995         return sock_tag_entry;
996 }
997
998 static void
999 data_counters_update(struct data_counters *dc, int set,
1000                      enum ifs_tx_rx direction, int proto, int bytes)
1001 {
1002         switch (proto) {
1003         case IPPROTO_TCP:
1004                 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1005                 break;
1006         case IPPROTO_UDP:
1007                 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1008                 break;
1009         case IPPROTO_IP:
1010         default:
1011                 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1012                                     1);
1013                 break;
1014         }
1015 }
1016
1017 /*
1018  * Update stats for the specified interface. Do nothing if the entry
1019  * does not exist (when a device was never configured with an IP address).
1020  * Called when an device is being unregistered.
1021  */
1022 static void iface_stat_update(struct net_device *dev)
1023 {
1024         struct rtnl_link_stats64 dev_stats, *stats;
1025         struct iface_stat *entry;
1026
1027         stats = dev_get_stats(dev, &dev_stats);
1028         spin_lock_bh(&iface_stat_list_lock);
1029         entry = get_iface_entry(dev->name);
1030         if (entry == NULL) {
1031                 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1032                          dev->name);
1033                 spin_unlock_bh(&iface_stat_list_lock);
1034                 return;
1035         }
1036         IF_DEBUG("qtaguid: iface_stat: update(%s): entry=%p\n",
1037                  dev->name, entry);
1038         if (entry->active) {
1039                 entry->tx_bytes += stats->tx_bytes;
1040                 entry->tx_packets += stats->tx_packets;
1041                 entry->rx_bytes += stats->rx_bytes;
1042                 entry->rx_packets += stats->rx_packets;
1043                 entry->active = false;
1044                 IF_DEBUG("qtaguid: iface_stat: update(%s): "
1045                          " disable tracking. rx/tx=%llu/%llu\n",
1046                          dev->name, stats->rx_bytes, stats->tx_bytes);
1047         } else {
1048                 IF_DEBUG("qtaguid: iface_stat: update(%s): disabled\n",
1049                         dev->name);
1050         }
1051         spin_unlock_bh(&iface_stat_list_lock);
1052 }
1053
1054 static void tag_stat_update(struct tag_stat *tag_entry,
1055                         enum ifs_tx_rx direction, int proto, int bytes)
1056 {
1057         int active_set;
1058         active_set = get_active_counter_set(tag_entry->tn.tag);
1059         MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1060                  "dir=%d proto=%d bytes=%d)\n",
1061                  tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1062                  active_set, direction, proto, bytes);
1063         data_counters_update(&tag_entry->counters, active_set, direction,
1064                              proto, bytes);
1065         if (tag_entry->parent_counters)
1066                 data_counters_update(tag_entry->parent_counters, active_set,
1067                                      direction, proto, bytes);
1068 }
1069
1070 /*
1071  * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1072  * the interface.
1073  * iface_entry->tag_stat_list_lock should be held.
1074  */
1075 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1076                                            tag_t tag)
1077 {
1078         struct tag_stat *new_tag_stat_entry = NULL;
1079         IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1080                  " (uid=%u)\n", __func__,
1081                  iface_entry, tag, get_uid_from_tag(tag));
1082         new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1083         if (!new_tag_stat_entry) {
1084                 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1085                 goto done;
1086         }
1087         new_tag_stat_entry->tn.tag = tag;
1088         tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1089 done:
1090         return new_tag_stat_entry;
1091 }
1092
1093 static void if_tag_stat_update(const char *ifname, uid_t uid,
1094                                const struct sock *sk, enum ifs_tx_rx direction,
1095                                int proto, int bytes)
1096 {
1097         struct tag_stat *tag_stat_entry;
1098         tag_t tag, acct_tag;
1099         tag_t uid_tag;
1100         struct data_counters *uid_tag_counters;
1101         struct sock_tag *sock_tag_entry;
1102         struct iface_stat *iface_entry;
1103         struct tag_stat *new_tag_stat;
1104         MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1105                 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1106                  ifname, uid, sk, direction, proto, bytes);
1107
1108
1109         iface_entry = get_iface_entry(ifname);
1110         if (!iface_entry) {
1111                 pr_err("qtaguid: iface_stat: stat_update() %s not found\n",
1112                        ifname);
1113                 return;
1114         }
1115         /* It is ok to process data when an iface_entry is inactive */
1116
1117         MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1118                  ifname, iface_entry);
1119
1120         /*
1121          * Look for a tagged sock.
1122          * It will have an acct_uid.
1123          */
1124         sock_tag_entry = get_sock_stat(sk);
1125         if (sock_tag_entry) {
1126                 tag = sock_tag_entry->tag;
1127                 acct_tag = get_atag_from_tag(tag);
1128                 uid_tag = get_utag_from_tag(tag);
1129         } else {
1130                 acct_tag = make_atag_from_value(0);
1131                 tag = combine_atag_with_uid(acct_tag, uid);
1132                 uid_tag = make_tag_from_uid(uid);
1133         }
1134         MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1135                  " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1136                  tag, get_uid_from_tag(tag), iface_entry);
1137         /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1138         spin_lock_bh(&iface_entry->tag_stat_list_lock);
1139
1140         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1141                                               tag);
1142         if (tag_stat_entry) {
1143                 /*
1144                  * Updating the {acct_tag, uid_tag} entry handles both stats:
1145                  * {0, uid_tag} will also get updated.
1146                  */
1147                 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1148                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1149                 return;
1150         }
1151
1152         /* Loop over tag list under this interface for {0,uid_tag} */
1153         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1154                                               uid_tag);
1155         if (!tag_stat_entry) {
1156                 /* Here: the base uid_tag did not exist */
1157                 /*
1158                  * No parent counters. So
1159                  *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1160                  */
1161                 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1162                 uid_tag_counters = &new_tag_stat->counters;
1163         } else {
1164                 uid_tag_counters = &tag_stat_entry->counters;
1165         }
1166
1167         if (acct_tag) {
1168                 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1169                 new_tag_stat->parent_counters = uid_tag_counters;
1170         }
1171         spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1172         tag_stat_update(new_tag_stat, direction, proto, bytes);
1173 }
1174
1175 static int iface_netdev_event_handler(struct notifier_block *nb,
1176                                       unsigned long event, void *ptr) {
1177         struct net_device *dev = ptr;
1178
1179         if (unlikely(module_passive))
1180                 return NOTIFY_DONE;
1181
1182         IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1183                  "ev=0x%lx netdev=%p->name=%s\n",
1184                  event, dev, dev ? dev->name : "");
1185
1186         switch (event) {
1187         case NETDEV_UP:
1188                 iface_stat_create(dev, NULL);
1189                 break;
1190         case NETDEV_DOWN:
1191                 iface_stat_update(dev);
1192                 break;
1193         }
1194         return NOTIFY_DONE;
1195 }
1196
1197 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1198                                          unsigned long event, void *ptr)
1199 {
1200         struct inet6_ifaddr *ifa = ptr;
1201         struct net_device *dev;
1202
1203         if (unlikely(module_passive))
1204                 return NOTIFY_DONE;
1205
1206         IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1207                  "ev=0x%lx ifa=%p\n",
1208                  event, ifa);
1209
1210         switch (event) {
1211         case NETDEV_UP:
1212                 BUG_ON(!ifa || !ifa->idev);
1213                 dev = (struct net_device *)ifa->idev->dev;
1214                 iface_stat_create_ipv6(dev, ifa);
1215                 atomic64_inc(&qtu_events.iface_events);
1216                 break;
1217         case NETDEV_DOWN:
1218                 BUG_ON(!ifa || !ifa->idev);
1219                 dev = (struct net_device *)ifa->idev->dev;
1220                 iface_stat_update(dev);
1221                 atomic64_inc(&qtu_events.iface_events);
1222                 break;
1223         }
1224         return NOTIFY_DONE;
1225 }
1226
1227 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1228                                         unsigned long event, void *ptr)
1229 {
1230         struct in_ifaddr *ifa = ptr;
1231         struct net_device *dev;
1232
1233         if (unlikely(module_passive))
1234                 return NOTIFY_DONE;
1235
1236         IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1237                  "ev=0x%lx ifa=%p\n",
1238                  event, ifa);
1239
1240         switch (event) {
1241         case NETDEV_UP:
1242                 BUG_ON(!ifa || !ifa->ifa_dev);
1243                 dev = ifa->ifa_dev->dev;
1244                 iface_stat_create(dev, ifa);
1245                 atomic64_inc(&qtu_events.iface_events);
1246                 break;
1247         case NETDEV_DOWN:
1248                 BUG_ON(!ifa || !ifa->ifa_dev);
1249                 dev = ifa->ifa_dev->dev;
1250                 iface_stat_update(dev);
1251                 atomic64_inc(&qtu_events.iface_events);
1252                 break;
1253         }
1254         return NOTIFY_DONE;
1255 }
1256
1257 static struct notifier_block iface_netdev_notifier_blk = {
1258         .notifier_call = iface_netdev_event_handler,
1259 };
1260
1261 static struct notifier_block iface_inetaddr_notifier_blk = {
1262         .notifier_call = iface_inetaddr_event_handler,
1263 };
1264
1265 static struct notifier_block iface_inet6addr_notifier_blk = {
1266         .notifier_call = iface_inet6addr_event_handler,
1267 };
1268
1269 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1270 {
1271         int err;
1272
1273         iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1274         if (!iface_stat_procdir) {
1275                 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1276                 err = -1;
1277                 goto err;
1278         }
1279         err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1280         if (err) {
1281                 pr_err("qtaguid: iface_stat: init "
1282                        "failed to register dev event handler\n");
1283                 goto err_zap_entry;
1284         }
1285         err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1286         if (err) {
1287                 pr_err("qtaguid: iface_stat: init "
1288                        "failed to register ipv4 dev event handler\n");
1289                 goto err_unreg_nd;
1290         }
1291
1292         err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1293         if (err) {
1294                 pr_err("qtaguid: iface_stat: init "
1295                        "failed to register ipv6 dev event handler\n");
1296                 goto err_unreg_ip4_addr;
1297         }
1298         return 0;
1299
1300 err_unreg_ip4_addr:
1301         unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1302 err_unreg_nd:
1303         unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1304 err_zap_entry:
1305         remove_proc_entry(iface_stat_procdirname, parent_procdir);
1306 err:
1307         return err;
1308 }
1309
1310 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1311                                     struct xt_action_param *par)
1312 {
1313         struct sock *sk;
1314         unsigned int hook_mask = (1 << par->hooknum);
1315
1316         MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1317                  par->hooknum, par->family);
1318
1319         /*
1320          * Let's not abuse the the xt_socket_get*_sk(), or else it will
1321          * return garbage SKs.
1322          */
1323         if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1324                 return NULL;
1325
1326         switch (par->family) {
1327         case NFPROTO_IPV6:
1328                 sk = xt_socket_get6_sk(skb, par);
1329                 break;
1330         case NFPROTO_IPV4:
1331                 sk = xt_socket_get4_sk(skb, par);
1332                 break;
1333         default:
1334                 return NULL;
1335         }
1336
1337         /*
1338          * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
1339          * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
1340          * Not fixed in 3.0-r3 :(
1341          */
1342         if (sk) {
1343                 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1344                          "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1345                 if (sk->sk_state  == TCP_TIME_WAIT) {
1346                         xt_socket_put_sk(sk);
1347                         sk = NULL;
1348                 }
1349         }
1350         return sk;
1351 }
1352
1353 static void account_for_uid(const struct sk_buff *skb,
1354                             const struct sock *alternate_sk, uid_t uid,
1355                             struct xt_action_param *par)
1356 {
1357         const struct net_device *el_dev;
1358
1359         if (!skb->dev) {
1360                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1361                 el_dev = par->in ? : par->out;
1362         } else {
1363                 const struct net_device *other_dev;
1364                 el_dev = skb->dev;
1365                 other_dev = par->in ? : par->out;
1366                 if (el_dev != other_dev) {
1367                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1368                                 "par->(in/out)=%p %s\n",
1369                                 par->hooknum, el_dev, el_dev->name, other_dev,
1370                                 other_dev->name);
1371                 }
1372         }
1373
1374         if (unlikely(!el_dev)) {
1375                 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1376         } else if (unlikely(!el_dev->name)) {
1377                 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1378         } else {
1379                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n",
1380                          par->hooknum,
1381                          el_dev->name,
1382                          el_dev->type);
1383
1384                 if_tag_stat_update(el_dev->name, uid,
1385                                 skb->sk ? skb->sk : alternate_sk,
1386                                 par->in ? IFS_RX : IFS_TX,
1387                                 ip_hdr(skb)->protocol, skb->len);
1388         }
1389 }
1390
1391 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1392 {
1393         const struct xt_qtaguid_match_info *info = par->matchinfo;
1394         const struct file *filp;
1395         bool got_sock = false;
1396         struct sock *sk;
1397         uid_t sock_uid;
1398         bool res;
1399
1400         if (unlikely(module_passive))
1401                 return (info->match ^ info->invert) == 0;
1402
1403         MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1404                  par->hooknum, skb, par->in, par->out, par->family);
1405
1406         if (skb == NULL) {
1407                 res = (info->match ^ info->invert) == 0;
1408                 goto ret_res;
1409         }
1410
1411         sk = skb->sk;
1412
1413         if (sk == NULL) {
1414                 /*
1415                  * A missing sk->sk_socket happens when packets are in-flight
1416                  * and the matching socket is already closed and gone.
1417                  */
1418                 sk = qtaguid_find_sk(skb, par);
1419                 /*
1420                  * If we got the socket from the find_sk(), we will need to put
1421                  * it back, as nf_tproxy_get_sock_v4() got it.
1422                  */
1423                 got_sock = sk;
1424                 if (sk)
1425                         atomic64_inc(&qtu_events.match_found_sk_in_ct);
1426         } else {
1427                 atomic64_inc(&qtu_events.match_found_sk);
1428         }
1429         MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n",
1430                 par->hooknum, sk, got_sock, ip_hdr(skb)->protocol);
1431         if (sk != NULL) {
1432                 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1433                         par->hooknum, sk, sk->sk_socket,
1434                         sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1435                 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1436                 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1437                         par->hooknum, filp ? filp->f_cred->fsuid : -1);
1438         }
1439
1440         if (sk == NULL || sk->sk_socket == NULL) {
1441                 /*
1442                  * Here, the qtaguid_find_sk() using connection tracking
1443                  * couldn't find the owner, so for now we just count them
1444                  * against the system.
1445                  */
1446                 /*
1447                  * TODO: unhack how to force just accounting.
1448                  * For now we only do iface stats when the uid-owner is not
1449                  * requested.
1450                  */
1451                 if (!(info->match & XT_QTAGUID_UID))
1452                         account_for_uid(skb, sk, 0, par);
1453                 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1454                         par->hooknum,
1455                         sk ? sk->sk_socket : NULL);
1456                 res = (info->match ^ info->invert) == 0;
1457                 atomic64_inc(&qtu_events.match_found_sk_none);
1458                 goto put_sock_ret_res;
1459         } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1460                 res = false;
1461                 goto put_sock_ret_res;
1462         }
1463         filp = sk->sk_socket->file;
1464         if (filp == NULL) {
1465                 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1466                 res = ((info->match ^ info->invert) &
1467                         (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1468                 goto put_sock_ret_res;
1469         }
1470         sock_uid = filp->f_cred->fsuid;
1471         /*
1472          * TODO: unhack how to force just accounting.
1473          * For now we only do iface stats when the uid-owner is not requested
1474          */
1475         if (!(info->match & XT_QTAGUID_UID))
1476                 account_for_uid(skb, sk, sock_uid, par);
1477
1478         /*
1479          * The following two tests fail the match when:
1480          *    id not in range AND no inverted condition requested
1481          * or id     in range AND    inverted condition requested
1482          * Thus (!a && b) || (a && !b) == a ^ b
1483          */
1484         if (info->match & XT_QTAGUID_UID)
1485                 if ((filp->f_cred->fsuid >= info->uid_min &&
1486                      filp->f_cred->fsuid <= info->uid_max) ^
1487                     !(info->invert & XT_QTAGUID_UID)) {
1488                         MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1489                                  par->hooknum);
1490                         res = false;
1491                         goto put_sock_ret_res;
1492                 }
1493         if (info->match & XT_QTAGUID_GID)
1494                 if ((filp->f_cred->fsgid >= info->gid_min &&
1495                                 filp->f_cred->fsgid <= info->gid_max) ^
1496                         !(info->invert & XT_QTAGUID_GID)) {
1497                         MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1498                                 par->hooknum);
1499                         res = false;
1500                         goto put_sock_ret_res;
1501                 }
1502
1503         MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1504         res = true;
1505
1506 put_sock_ret_res:
1507         if (got_sock)
1508                 xt_socket_put_sk(sk);
1509 ret_res:
1510         MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1511         return res;
1512 }
1513
1514 /*
1515  * Procfs reader to get all active socket tags using style "1)" as described in
1516  * fs/proc/generic.c
1517  */
1518 static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
1519                                   off_t items_to_skip, int char_count, int *eof,
1520                                   void *data)
1521 {
1522         char *outp = page;
1523         int len;
1524         uid_t uid;
1525         struct rb_node *node;
1526         struct sock_tag *sock_tag_entry;
1527         int item_index = 0;
1528         int indent_level = 0;
1529         long f_count;
1530
1531         if (unlikely(module_passive)) {
1532                 *eof = 1;
1533                 return 0;
1534         }
1535
1536         if (*eof)
1537                 return 0;
1538
1539         CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
1540                 page, items_to_skip, char_count, *eof);
1541
1542         spin_lock_bh(&sock_tag_list_lock);
1543         for (node = rb_first(&sock_tag_tree);
1544              node;
1545              node = rb_next(node)) {
1546                 if (item_index++ < items_to_skip)
1547                         continue;
1548                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1549                 uid = get_uid_from_tag(sock_tag_entry->tag);
1550                 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1551                          "pid=%u\n",
1552                          sock_tag_entry->sk,
1553                          sock_tag_entry->tag,
1554                          uid,
1555                          sock_tag_entry->pid
1556                         );
1557                 f_count = atomic_long_read(
1558                         &sock_tag_entry->socket->file->f_count);
1559                 len = snprintf(outp, char_count,
1560                                "sock=%p tag=0x%llx (uid=%u) pid=%u "
1561                                "f_count=%lu\n",
1562                                sock_tag_entry->sk,
1563                                sock_tag_entry->tag, uid,
1564                                sock_tag_entry->pid, f_count);
1565                 if (len >= char_count) {
1566                         spin_unlock_bh(&sock_tag_list_lock);
1567                         *outp = '\0';
1568                         return outp - page;
1569                 }
1570                 outp += len;
1571                 char_count -= len;
1572                 (*num_items_returned)++;
1573         }
1574         spin_unlock_bh(&sock_tag_list_lock);
1575
1576         if (item_index++ >= items_to_skip) {
1577                 len = snprintf(outp, char_count,
1578                                "events: sockets_tagged=%llu "
1579                                "sockets_untagged=%llu "
1580                                "counter_set_changes=%llu "
1581                                "delete_cmds=%llu "
1582                                "iface_events=%llu "
1583                                "match_found_sk=%llu "
1584                                "match_found_sk_in_ct=%llu "
1585                                "match_found_sk_none=%llu\n",
1586                                atomic64_read(&qtu_events.sockets_tagged),
1587                                atomic64_read(&qtu_events.sockets_untagged),
1588                                atomic64_read(&qtu_events.counter_set_changes),
1589                                atomic64_read(&qtu_events.delete_cmds),
1590                                atomic64_read(&qtu_events.iface_events),
1591                                atomic64_read(&qtu_events.match_found_sk),
1592                                atomic64_read(&qtu_events.match_found_sk_in_ct),
1593                                atomic64_read(&qtu_events.match_found_sk_none));
1594                 if (len >= char_count) {
1595                         *outp = '\0';
1596                         return outp - page;
1597                 }
1598                 outp += len;
1599                 char_count -= len;
1600                 (*num_items_returned)++;
1601         }
1602
1603 #ifdef CDEBUG
1604         /* Count the following as part of the last item_index */
1605         if (item_index > items_to_skip) {
1606                 CT_DEBUG("qtaguid: proc ctrl state debug {\n");
1607                 spin_lock_bh(&sock_tag_list_lock);
1608                 prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1609                 spin_unlock_bh(&sock_tag_list_lock);
1610
1611                 spin_lock_bh(&uid_tag_data_tree_lock);
1612                 prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1613                 prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1614                 spin_unlock_bh(&uid_tag_data_tree_lock);
1615
1616                 spin_lock_bh(&iface_stat_list_lock);
1617                 prdebug_iface_stat_list(indent_level, &iface_stat_list);
1618                 spin_unlock_bh(&iface_stat_list_lock);
1619
1620                 CT_DEBUG("qtaguid: proc ctrl state debug }\n");
1621
1622
1623         }
1624 #endif
1625
1626         *eof = 1;
1627         return outp - page;
1628 }
1629
1630 /*
1631  * Delete socket tags, and stat tags associated with a given
1632  * accouting tag and uid.
1633  */
1634 static int ctrl_cmd_delete(const char *input)
1635 {
1636         char cmd;
1637         uid_t uid;
1638         uid_t entry_uid;
1639         tag_t acct_tag;
1640         tag_t tag;
1641         int res, argc;
1642         struct iface_stat *iface_entry;
1643         struct rb_node *node;
1644         struct sock_tag *st_entry;
1645         struct rb_root st_to_free_tree = RB_ROOT;
1646         struct tag_stat *ts_entry;
1647         struct tag_counter_set *tcs_entry;
1648         struct tag_ref *tr_entry;
1649         struct uid_tag_data *utd_entry;
1650
1651         argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
1652         CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1653                  "user_tag=0x%llx uid=%u\n", input, argc, cmd,
1654                  acct_tag, uid);
1655         if (argc < 2) {
1656                 res = -EINVAL;
1657                 goto err;
1658         }
1659         if (!valid_atag(acct_tag)) {
1660                 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
1661                 res = -EINVAL;
1662                 goto err;
1663         }
1664         if (argc < 3) {
1665                 uid = current_fsuid();
1666         } else if (!can_impersonate_uid(uid)) {
1667                 pr_info("qtaguid: ctrl_delete(%s): "
1668                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
1669                         input, current->pid, current->tgid, current_fsuid());
1670                 res = -EPERM;
1671                 goto err;
1672         }
1673
1674         tag = combine_atag_with_uid(acct_tag, uid);
1675         CT_DEBUG("qtaguid: ctrl_delete(): "
1676                  "looking for tag=0x%llx (uid=%u)\n",
1677                  tag, uid);
1678
1679         /* Delete socket tags */
1680         spin_lock_bh(&sock_tag_list_lock);
1681         node = rb_first(&sock_tag_tree);
1682         while (node) {
1683                 st_entry = rb_entry(node, struct sock_tag, sock_node);
1684                 entry_uid = get_uid_from_tag(st_entry->tag);
1685                 node = rb_next(node);
1686                 if (entry_uid != uid)
1687                         continue;
1688
1689                 CT_DEBUG("qtaguid: ctrl_delete(): st tag=0x%llx (uid=%u)\n",
1690                          st_entry->tag, entry_uid);
1691
1692                 if (!acct_tag || st_entry->tag == tag) {
1693                         rb_erase(&st_entry->sock_node, &sock_tag_tree);
1694                         /* Can't sockfd_put() within spinlock, do it later. */
1695                         sock_tag_tree_insert(st_entry, &st_to_free_tree);
1696                         tr_entry = lookup_tag_ref(st_entry->tag, NULL);
1697                         BUG_ON(tr_entry->num_sock_tags <= 0);
1698                         tr_entry->num_sock_tags--;
1699                 }
1700         }
1701         spin_unlock_bh(&sock_tag_list_lock);
1702
1703         sock_tag_tree_erase(&st_to_free_tree);
1704
1705         /* Delete tag counter-sets */
1706         spin_lock_bh(&tag_counter_set_list_lock);
1707         /* Counter sets are only on the uid tag, not full tag */
1708         tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
1709         if (tcs_entry) {
1710                 CT_DEBUG("qtaguid: ctrl_delete(): "
1711                          "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
1712                          tcs_entry->tn.tag,
1713                          get_uid_from_tag(tcs_entry->tn.tag),
1714                          tcs_entry->active_set);
1715                 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
1716                 kfree(tcs_entry);
1717         }
1718         spin_unlock_bh(&tag_counter_set_list_lock);
1719
1720         /*
1721          * If acct_tag is 0, then all entries belonging to uid are
1722          * erased.
1723          */
1724         spin_lock_bh(&iface_stat_list_lock);
1725         list_for_each_entry(iface_entry, &iface_stat_list, list) {
1726                 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1727                 node = rb_first(&iface_entry->tag_stat_tree);
1728                 while (node) {
1729                         ts_entry = rb_entry(node, struct tag_stat, tn.node);
1730                         entry_uid = get_uid_from_tag(ts_entry->tn.tag);
1731                         node = rb_next(node);
1732
1733                         CT_DEBUG("qtaguid: ctrl_delete(): "
1734                                  "ts tag=0x%llx (uid=%u)\n",
1735                                  ts_entry->tn.tag, entry_uid);
1736
1737                         if (entry_uid != uid)
1738                                 continue;
1739                         if (!acct_tag || ts_entry->tn.tag == tag) {
1740                                 CT_DEBUG("qtaguid: ctrl_delete(): "
1741                                          "erase ts: %s 0x%llx %u\n",
1742                                          iface_entry->ifname,
1743                                          get_atag_from_tag(ts_entry->tn.tag),
1744                                          entry_uid);
1745                                 rb_erase(&ts_entry->tn.node,
1746                                          &iface_entry->tag_stat_tree);
1747                                 kfree(ts_entry);
1748                         }
1749                 }
1750                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1751         }
1752         spin_unlock_bh(&iface_stat_list_lock);
1753
1754         /* Cleanup the uid_tag_data */
1755         spin_lock_bh(&uid_tag_data_tree_lock);
1756         node = rb_first(&uid_tag_data_tree);
1757         while (node) {
1758                 utd_entry = rb_entry(node, struct uid_tag_data, node);
1759                 entry_uid = utd_entry->uid;
1760                 node = rb_next(node);
1761
1762                 CT_DEBUG("qtaguid: ctrl_delete(): "
1763                          "utd uid=%u\n",
1764                          entry_uid);
1765
1766                 if (entry_uid != uid)
1767                         continue;
1768                 /*
1769                  * Go over the tag_refs, and those that don't have
1770                  * sock_tags using them are freed.
1771                  */
1772                 put_tag_ref_tree(tag, utd_entry);
1773                 put_utd_entry(utd_entry);
1774         }
1775         spin_unlock_bh(&uid_tag_data_tree_lock);
1776
1777         atomic64_inc(&qtu_events.delete_cmds);
1778         res = 0;
1779
1780 err:
1781         return res;
1782 }
1783
1784 static int ctrl_cmd_counter_set(const char *input)
1785 {
1786         char cmd;
1787         uid_t uid = 0;
1788         tag_t tag;
1789         int res, argc;
1790         struct tag_counter_set *tcs;
1791         int counter_set;
1792
1793         argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
1794         CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
1795                  "set=%d uid=%u\n", input, argc, cmd,
1796                  counter_set, uid);
1797         if (argc != 3) {
1798                 res = -EINVAL;
1799                 goto err;
1800         }
1801         if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
1802                 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
1803                         input);
1804                 res = -EINVAL;
1805                 goto err;
1806         }
1807         if (!can_manipulate_uids()) {
1808                 pr_info("qtaguid: ctrl_counterset(%s): "
1809                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
1810                         input, current->pid, current->tgid, current_fsuid());
1811                 res = -EPERM;
1812                 goto err;
1813         }
1814
1815         tag = make_tag_from_uid(uid);
1816         spin_lock_bh(&tag_counter_set_list_lock);
1817         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
1818         if (!tcs) {
1819                 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
1820                 if (!tcs) {
1821                         spin_unlock_bh(&tag_counter_set_list_lock);
1822                         pr_err("qtaguid: ctrl_counterset(%s): "
1823                                "failed to alloc counter set\n",
1824                                input);
1825                         res = -ENOMEM;
1826                         goto err;
1827                 }
1828                 tcs->tn.tag = tag;
1829                 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
1830                 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
1831                          "(uid=%u) set=%d\n",
1832                          input, tag, get_uid_from_tag(tag), counter_set);
1833         }
1834         tcs->active_set = counter_set;
1835         spin_unlock_bh(&tag_counter_set_list_lock);
1836         atomic64_inc(&qtu_events.counter_set_changes);
1837         res = 0;
1838
1839 err:
1840         return res;
1841 }
1842
1843 static int ctrl_cmd_tag(const char *input)
1844 {
1845         char cmd;
1846         int sock_fd = 0;
1847         uid_t uid = 0;
1848         tag_t acct_tag = make_atag_from_value(0);
1849         tag_t full_tag;
1850         struct socket *el_socket;
1851         int res, argc;
1852         struct sock_tag *sock_tag_entry;
1853         struct tag_ref *tag_ref_entry;
1854         struct uid_tag_data *uid_tag_data_entry;
1855         struct proc_qtu_data *pqd_entry;
1856
1857         /* Unassigned args will get defaulted later. */
1858         argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
1859         CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
1860                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
1861                  acct_tag, uid);
1862         if (argc < 2) {
1863                 res = -EINVAL;
1864                 goto err;
1865         }
1866         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
1867         if (!el_socket) {
1868                 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
1869                         " sock_fd=%d err=%d\n", input, sock_fd, res);
1870                 goto err;
1871         }
1872         CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
1873                  input, atomic_long_read(&el_socket->file->f_count),
1874                  el_socket->sk);
1875         if (argc < 3) {
1876                 acct_tag = make_atag_from_value(0);
1877         } else if (!valid_atag(acct_tag)) {
1878                 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
1879                 res = -EINVAL;
1880                 goto err_put;
1881         }
1882         CT_DEBUG("qtaguid: ctrl_tag(%s): "
1883                  "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
1884                  "in_group=%d in_egroup=%d\n",
1885                  input, current->pid, current->tgid, current_uid(),
1886                  current_euid(), current_fsuid(),
1887                  in_group_p(proc_ctrl_write_gid),
1888                  in_egroup_p(proc_ctrl_write_gid));
1889         if (argc < 4) {
1890                 uid = current_fsuid();
1891         } else if (!can_impersonate_uid(uid)) {
1892                 pr_info("qtaguid: ctrl_tag(%s): "
1893                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
1894                         input, current->pid, current->tgid, current_fsuid());
1895                 res = -EPERM;
1896                 goto err_put;
1897         }
1898         full_tag = combine_atag_with_uid(acct_tag, uid);
1899
1900         spin_lock_bh(&sock_tag_list_lock);
1901         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
1902         tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
1903         if (IS_ERR(tag_ref_entry)) {
1904                 res = PTR_ERR(tag_ref_entry);
1905                 spin_unlock_bh(&sock_tag_list_lock);
1906                 goto err_put;
1907         }
1908         tag_ref_entry->num_sock_tags++;
1909         if (sock_tag_entry) {
1910                 struct tag_ref *prev_tag_ref_entry;
1911
1912                 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
1913                          "st@%p ...->f_count=%ld\n",
1914                          input, el_socket->sk, sock_tag_entry,
1915                          atomic_long_read(&el_socket->file->f_count));
1916                 /*
1917                  * This is a re-tagging, so release the sock_fd that was
1918                  * locked at the time of the 1st tagging.
1919                  * There is still the ref from this call's sockfd_lookup() so
1920                  * it can be done within the spinlock.
1921                  */
1922                 sockfd_put(sock_tag_entry->socket);
1923                 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
1924                                                     &uid_tag_data_entry);
1925                 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
1926                 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
1927                 prev_tag_ref_entry->num_sock_tags--;
1928                 sock_tag_entry->tag = full_tag;
1929         } else {
1930                 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
1931                          input, el_socket->sk);
1932                 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
1933                                          GFP_ATOMIC);
1934                 if (!sock_tag_entry) {
1935                         pr_err("qtaguid: ctrl_tag(%s): "
1936                                "socket tag alloc failed\n",
1937                                input);
1938                         spin_unlock_bh(&sock_tag_list_lock);
1939                         res = -ENOMEM;
1940                         goto err_tag_unref_put;
1941                 }
1942                 sock_tag_entry->sk = el_socket->sk;
1943                 sock_tag_entry->socket = el_socket;
1944                 sock_tag_entry->pid = current->tgid;
1945                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
1946                                                             uid);
1947                 spin_lock_bh(&uid_tag_data_tree_lock);
1948                 pqd_entry = proc_qtu_data_tree_search(
1949                         &proc_qtu_data_tree, current->tgid);
1950                 /* TODO: remove if() test, do BUG_ON() */
1951                 WARN_ON(IS_ERR_OR_NULL(pqd_entry));
1952                 if (!IS_ERR_OR_NULL(pqd_entry)) {
1953                         list_add(&sock_tag_entry->list,
1954                                  &pqd_entry->sock_tag_list);
1955                 }
1956                 spin_unlock_bh(&uid_tag_data_tree_lock);
1957
1958                 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
1959                 atomic64_inc(&qtu_events.sockets_tagged);
1960         }
1961         spin_unlock_bh(&sock_tag_list_lock);
1962         /* We keep the ref to the socket (file) until it is untagged */
1963         CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
1964                  input, sock_tag_entry,
1965                  atomic_long_read(&el_socket->file->f_count));
1966         return 0;
1967
1968 err_tag_unref_put:
1969         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
1970         tag_ref_entry->num_sock_tags--;
1971         free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
1972 err_put:
1973         CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
1974                  input, atomic_long_read(&el_socket->file->f_count) - 1);
1975         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
1976         sockfd_put(el_socket);
1977         return res;
1978
1979 err:
1980         CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
1981         return res;
1982 }
1983
1984 static int ctrl_cmd_untag(const char *input)
1985 {
1986         char cmd;
1987         int sock_fd = 0;
1988         struct socket *el_socket;
1989         int res, argc;
1990         struct sock_tag *sock_tag_entry;
1991         struct tag_ref *tag_ref_entry;
1992         struct uid_tag_data *utd_entry;
1993         struct proc_qtu_data *pqd_entry;
1994
1995         argc = sscanf(input, "%c %d", &cmd, &sock_fd);
1996         CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
1997                  input, argc, cmd, sock_fd);
1998         if (argc < 2) {
1999                 res = -EINVAL;
2000                 goto err;
2001         }
2002         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2003         if (!el_socket) {
2004                 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2005                         " sock_fd=%d err=%d\n", input, sock_fd, res);
2006                 goto err;
2007         }
2008         CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2009                  input, atomic_long_read(&el_socket->file->f_count),
2010                  el_socket->sk);
2011         spin_lock_bh(&sock_tag_list_lock);
2012         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2013         if (!sock_tag_entry) {
2014                 spin_unlock_bh(&sock_tag_list_lock);
2015                 res = -EINVAL;
2016                 goto err_put;
2017         }
2018         /*
2019          * The socket already belongs to the current process
2020          * so it can do whatever it wants to it.
2021          */
2022         rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2023
2024         tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2025         BUG_ON(!tag_ref_entry);
2026         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2027         spin_lock_bh(&uid_tag_data_tree_lock);
2028         pqd_entry = proc_qtu_data_tree_search(
2029                 &proc_qtu_data_tree, current->tgid);
2030         /* TODO: remove if() test, do BUG_ON() */
2031         WARN_ON(IS_ERR_OR_NULL(pqd_entry));
2032         if (!IS_ERR_OR_NULL(pqd_entry))
2033                 list_del(&sock_tag_entry->list);
2034         spin_unlock_bh(&uid_tag_data_tree_lock);
2035         /*
2036          * We don't free tag_ref from the utd_entry here,
2037          * only during a cmd_delete().
2038          */
2039         tag_ref_entry->num_sock_tags--;
2040         spin_unlock_bh(&sock_tag_list_lock);
2041         /*
2042          * Release the sock_fd that was grabbed at tag time,
2043          * and once more for the sockfd_lookup() here.
2044          */
2045         sockfd_put(sock_tag_entry->socket);
2046         CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2047                  input, sock_tag_entry,
2048                  atomic_long_read(&el_socket->file->f_count) - 1);
2049         sockfd_put(el_socket);
2050
2051         kfree(sock_tag_entry);
2052         atomic64_inc(&qtu_events.sockets_untagged);
2053
2054         return 0;
2055
2056 err_put:
2057         CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2058                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2059         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2060         sockfd_put(el_socket);
2061         return res;
2062
2063 err:
2064         CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2065         return res;
2066 }
2067
2068 static int qtaguid_ctrl_parse(const char *input, int count)
2069 {
2070         char cmd;
2071         int res;
2072
2073         cmd = input[0];
2074         /* Collect params for commands */
2075         switch (cmd) {
2076         case 'd':
2077                 res = ctrl_cmd_delete(input);
2078                 break;
2079
2080         case 's':
2081                 res = ctrl_cmd_counter_set(input);
2082                 break;
2083
2084         case 't':
2085                 res = ctrl_cmd_tag(input);
2086                 break;
2087
2088         case 'u':
2089                 res = ctrl_cmd_untag(input);
2090                 break;
2091
2092         default:
2093                 res = -EINVAL;
2094                 goto err;
2095         }
2096         if (!res)
2097                 res = count;
2098 err:
2099         CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
2100         return res;
2101 }
2102
2103 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2104 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2105                         unsigned long count, void *data)
2106 {
2107         char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2108
2109         if (unlikely(module_passive))
2110                 return count;
2111
2112         if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2113                 return -EINVAL;
2114
2115         if (copy_from_user(input_buf, buffer, count))
2116                 return -EFAULT;
2117
2118         input_buf[count] = '\0';
2119         return qtaguid_ctrl_parse(input_buf, count);
2120 }
2121
2122 struct proc_print_info {
2123         char *outp;
2124         char **num_items_returned;
2125         struct iface_stat *iface_entry;
2126         struct tag_stat *ts_entry;
2127         int item_index;
2128         int items_to_skip;
2129         int char_count;
2130 };
2131
2132 static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
2133 {
2134         int len;
2135         struct data_counters *cnts;
2136
2137         if (!ppi->item_index) {
2138                 if (ppi->item_index++ < ppi->items_to_skip)
2139                         return 0;
2140                 len = snprintf(ppi->outp, ppi->char_count,
2141                                "idx iface acct_tag_hex uid_tag_int cnt_set "
2142                                "rx_bytes rx_packets "
2143                                "tx_bytes tx_packets "
2144                                "rx_tcp_packets rx_tcp_bytes "
2145                                "rx_udp_packets rx_udp_bytes "
2146                                "rx_other_packets rx_other_bytes "
2147                                "tx_tcp_packets tx_tcp_bytes "
2148                                "tx_udp_packets tx_udp_bytes "
2149                                "tx_other_packets tx_other_bytes\n");
2150         } else {
2151                 tag_t tag = ppi->ts_entry->tn.tag;
2152                 uid_t stat_uid = get_uid_from_tag(tag);
2153
2154                 if (!can_read_other_uid_stats(stat_uid)) {
2155                         CT_DEBUG("qtaguid: stats line: "
2156                                  "%s 0x%llx %u: insufficient priv "
2157                                  "from pid=%u tgid=%u uid=%u\n",
2158                                  ppi->iface_entry->ifname,
2159                                  get_atag_from_tag(tag), stat_uid,
2160                                  current->pid, current->tgid, current_fsuid());
2161                         return 0;
2162                 }
2163                 if (ppi->item_index++ < ppi->items_to_skip)
2164                         return 0;
2165                 cnts = &ppi->ts_entry->counters;
2166                 len = snprintf(
2167                         ppi->outp, ppi->char_count,
2168                         "%d %s 0x%llx %u %u "
2169                         "%llu %llu "
2170                         "%llu %llu "
2171                         "%llu %llu "
2172                         "%llu %llu "
2173                         "%llu %llu "
2174                         "%llu %llu "
2175                         "%llu %llu "
2176                         "%llu %llu\n",
2177                         ppi->item_index,
2178                         ppi->iface_entry->ifname,
2179                         get_atag_from_tag(tag),
2180                         stat_uid,
2181                         cnt_set,
2182                         dc_sum_bytes(cnts, cnt_set, IFS_RX),
2183                         dc_sum_packets(cnts, cnt_set, IFS_RX),
2184                         dc_sum_bytes(cnts, cnt_set, IFS_TX),
2185                         dc_sum_packets(cnts, cnt_set, IFS_TX),
2186                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2187                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2188                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2189                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2190                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2191                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2192                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2193                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2194                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2195                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2196                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2197                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2198         }
2199         return len;
2200 }
2201
2202 bool pp_sets(struct proc_print_info *ppi)
2203 {
2204         int len;
2205         int counter_set;
2206         for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2207              counter_set++) {
2208                 len = pp_stats_line(ppi, counter_set);
2209                 if (len >= ppi->char_count) {
2210                         *ppi->outp = '\0';
2211                         return false;
2212                 }
2213                 if (len) {
2214                         ppi->outp += len;
2215                         ppi->char_count -= len;
2216                         (*ppi->num_items_returned)++;
2217                 }
2218         }
2219         return true;
2220 }
2221
2222 /*
2223  * Procfs reader to get all tag stats using style "1)" as described in
2224  * fs/proc/generic.c
2225  * Groups all protocols tx/rx bytes.
2226  */
2227 static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
2228                                 off_t items_to_skip, int char_count, int *eof,
2229                                 void *data)
2230 {
2231         struct proc_print_info ppi;
2232         int len;
2233
2234         ppi.outp = page;
2235         ppi.item_index = 0;
2236         ppi.char_count = char_count;
2237         ppi.num_items_returned = num_items_returned;
2238         ppi.items_to_skip = items_to_skip;
2239
2240         if (unlikely(module_passive)) {
2241                 len = pp_stats_line(&ppi, 0);
2242                 /* The header should always be shorter than the buffer. */
2243                 BUG_ON(len >= ppi.char_count);
2244                 (*num_items_returned)++;
2245                 *eof = 1;
2246                 return len;
2247         }
2248
2249         CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
2250                 "char_count=%d *eof=%d\n", page, *num_items_returned,
2251                 items_to_skip, char_count, *eof);
2252
2253         if (*eof)
2254                 return 0;
2255
2256         /* The idx is there to help debug when things go belly up. */
2257         len = pp_stats_line(&ppi, 0);
2258         /* Don't advance the outp unless the whole line was printed */
2259         if (len >= ppi.char_count) {
2260                 *ppi.outp = '\0';
2261                 return ppi.outp - page;
2262         }
2263         if (len) {
2264                 ppi.outp += len;
2265                 ppi.char_count -= len;
2266                 (*num_items_returned)++;
2267         }
2268
2269         spin_lock_bh(&iface_stat_list_lock);
2270         list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
2271                 struct rb_node *node;
2272                 spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
2273                 for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
2274                      node;
2275                      node = rb_next(node)) {
2276                         ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
2277                         if (!pp_sets(&ppi)) {
2278                                 spin_unlock_bh(
2279                                         &ppi.iface_entry->tag_stat_list_lock);
2280                                 spin_unlock_bh(&iface_stat_list_lock);
2281                                 return ppi.outp - page;
2282                         }
2283                 }
2284                 spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
2285         }
2286         spin_unlock_bh(&iface_stat_list_lock);
2287
2288         *eof = 1;
2289         return ppi.outp - page;
2290 }
2291
2292 /*------------------------------------------*/
2293 static int qtudev_open(struct inode *inode, struct file *file)
2294 {
2295         struct uid_tag_data *utd_entry;
2296         struct proc_qtu_data  *pqd_entry;
2297         struct proc_qtu_data  *new_pqd_entry = 0;
2298         int res;
2299         bool utd_entry_found;
2300
2301         if (unlikely(qtu_proc_handling_passive))
2302                 return 0;
2303
2304         DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2305                  current->pid, current->tgid, current_fsuid());
2306
2307         spin_lock_bh(&uid_tag_data_tree_lock);
2308
2309         /* Look for existing uid data, or alloc one. */
2310         utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
2311         if (IS_ERR_OR_NULL(utd_entry)) {
2312                 res = PTR_ERR(utd_entry);
2313                 goto err;
2314         }
2315
2316         /* Look for existing PID based proc_data */
2317         pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2318                                               current->tgid);
2319         if (pqd_entry) {
2320                 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2321                        "%s already opened\n",
2322                        current->pid, current->tgid, current_fsuid(),
2323                        QTU_DEV_NAME);
2324                 res = -EBUSY;
2325                 goto err_unlock_free_utd;
2326         }
2327
2328         new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2329         if (!new_pqd_entry) {
2330                 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2331                        "proc data alloc failed\n",
2332                        current->pid, current->tgid, current_fsuid());
2333                 res = -ENOMEM;
2334                 goto err_unlock_free_utd;
2335         }
2336         new_pqd_entry->pid = current->tgid;
2337         INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2338         new_pqd_entry->parent_tag_data = utd_entry;
2339
2340         proc_qtu_data_tree_insert(new_pqd_entry,
2341                                   &proc_qtu_data_tree);
2342
2343         spin_unlock_bh(&uid_tag_data_tree_lock);
2344         DR_DEBUG("qtaguid: tracking data for uid=%u\n", current_fsuid());
2345         file->private_data = new_pqd_entry;
2346         return 0;
2347
2348 err_unlock_free_utd:
2349         if (!utd_entry_found) {
2350                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2351                 kfree(utd_entry);
2352         }
2353         spin_unlock_bh(&uid_tag_data_tree_lock);
2354 err:
2355         return res;
2356 }
2357
2358 static int qtudev_release(struct inode *inode, struct file *file)
2359 {
2360         struct proc_qtu_data  *pqd_entry = file->private_data;
2361         struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
2362         struct sock_tag *st_entry;
2363         struct rb_root st_to_free_tree = RB_ROOT;
2364         struct list_head *entry, *next;
2365         struct tag_ref *tr;
2366
2367         if (unlikely(qtu_proc_handling_passive))
2368                 return 0;
2369
2370         /*
2371          * Do not trust the current->pid, it might just be a kworker cleaning
2372          * up after a dead proc.
2373          */
2374         DR_DEBUG("qtaguid: qtudev_release(): "
2375                  "pid=%u tgid=%u uid=%u "
2376                  "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2377                  current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2378                  pqd_entry, pqd_entry->pid, utd_entry,
2379                  utd_entry->num_active_tags);
2380
2381         spin_lock_bh(&sock_tag_list_lock);
2382         spin_lock_bh(&uid_tag_data_tree_lock);
2383
2384         /*
2385          * If this proc didn't actually tag anything for itself, or has already
2386          * willingly cleaned up itself ...
2387          */
2388         put_utd_entry(utd_entry);
2389
2390         list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2391                 st_entry = list_entry(entry, struct sock_tag, list);
2392                 DR_DEBUG("qtaguid: %s(): "
2393                          "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2394                          __func__,
2395                          st_entry, st_entry->sk,
2396                          current->pid, current->tgid,
2397                          pqd_entry->parent_tag_data->uid);
2398
2399                 utd_entry = uid_tag_data_tree_search(
2400                         &uid_tag_data_tree,
2401                         get_uid_from_tag(st_entry->tag));
2402                 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2403                 DR_DEBUG("qtaguid: %s(): "
2404                          "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2405                          st_entry->tag, utd_entry);
2406                 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2407                                          st_entry->tag);
2408                 BUG_ON(!tr);
2409                 BUG_ON(tr->num_sock_tags <= 0);
2410                 tr->num_sock_tags--;
2411                 free_tag_ref_from_utd_entry(tr, utd_entry);
2412
2413                 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2414                 list_del(&st_entry->list);
2415                 /* Can't sockfd_put() within spinlock, do it later. */
2416                 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2417
2418                 /* Do not put_utd_entry(utd_entry) someone elses utd_entry */
2419         }
2420
2421         rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2422         kfree(pqd_entry);
2423         file->private_data = NULL;
2424
2425         spin_unlock_bh(&uid_tag_data_tree_lock);
2426         spin_unlock_bh(&sock_tag_list_lock);
2427
2428
2429         sock_tag_tree_erase(&st_to_free_tree);
2430
2431
2432         return 0;
2433 }
2434
2435 /*------------------------------------------*/
2436 static const struct file_operations qtudev_fops = {
2437         .owner = THIS_MODULE,
2438         .open = qtudev_open,
2439         .release = qtudev_release,
2440 };
2441
2442 static struct miscdevice qtu_device = {
2443         .minor = MISC_DYNAMIC_MINOR,
2444         .name = QTU_DEV_NAME,
2445         .fops = &qtudev_fops,
2446         /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2447 };
2448
2449 /*------------------------------------------*/
2450 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2451 {
2452         int ret;
2453         *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2454         if (!*res_procdir) {
2455                 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2456                 ret = -ENOMEM;
2457                 goto no_dir;
2458         }
2459
2460         xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
2461                                                 *res_procdir);
2462         if (!xt_qtaguid_ctrl_file) {
2463                 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2464                         " file\n");
2465                 ret = -ENOMEM;
2466                 goto no_ctrl_entry;
2467         }
2468         xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
2469         xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
2470
2471         xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
2472                                                 *res_procdir);
2473         if (!xt_qtaguid_stats_file) {
2474                 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2475                         "file\n");
2476                 ret = -ENOMEM;
2477                 goto no_stats_entry;
2478         }
2479         xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
2480         /*
2481          * TODO: add support counter hacking
2482          * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2483          */
2484         return 0;
2485
2486 no_stats_entry:
2487         remove_proc_entry("ctrl", *res_procdir);
2488 no_ctrl_entry:
2489         remove_proc_entry("xt_qtaguid", NULL);
2490 no_dir:
2491         return ret;
2492 }
2493
2494 static struct xt_match qtaguid_mt_reg __read_mostly = {
2495         /*
2496          * This module masquerades as the "owner" module so that iptables
2497          * tools can deal with it.
2498          */
2499         .name       = "owner",
2500         .revision   = 1,
2501         .family     = NFPROTO_UNSPEC,
2502         .match      = qtaguid_mt,
2503         .matchsize  = sizeof(struct xt_qtaguid_match_info),
2504         .me         = THIS_MODULE,
2505 };
2506
2507 static int __init qtaguid_mt_init(void)
2508 {
2509         if (qtaguid_proc_register(&xt_qtaguid_procdir)
2510             || iface_stat_init(xt_qtaguid_procdir)
2511             || xt_register_match(&qtaguid_mt_reg)
2512             || misc_register(&qtu_device))
2513                 return -1;
2514         return 0;
2515 }
2516
2517 /*
2518  * TODO: allow unloading of the module.
2519  * For now stats are permanent.
2520  * Kconfig forces'y/n' and never an 'm'.
2521  */
2522
2523 module_init(qtaguid_mt_init);
2524 MODULE_AUTHOR("jpa <jpa@google.com>");
2525 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
2526 MODULE_LICENSE("GPL");
2527 MODULE_ALIAS("ipt_owner");
2528 MODULE_ALIAS("ip6t_owner");
2529 MODULE_ALIAS("ipt_qtaguid");
2530 MODULE_ALIAS("ip6t_qtaguid");