netfilter: xt_qtaguid: fix bad tcp_time_wait sock handling
[linux-3.10.git] / net / netfilter / xt_qtaguid.c
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /*
12  * There are run-time debug flags enabled via the debug_mask module param, or
13  * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14  */
15 #define DEBUG
16
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/netfilter/x_tables.h>
21 #include <linux/netfilter/xt_qtaguid.h>
22 #include <linux/ratelimit.h>
23 #include <linux/skbuff.h>
24 #include <linux/workqueue.h>
25 #include <net/addrconf.h>
26 #include <net/sock.h>
27 #include <net/tcp.h>
28 #include <net/udp.h>
29
30 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
31 #include <linux/netfilter_ipv6/ip6_tables.h>
32 #endif
33
34 #include <linux/netfilter/xt_socket.h>
35 #include "xt_qtaguid_internal.h"
36 #include "xt_qtaguid_print.h"
37
38 /*
39  * We only use the xt_socket funcs within a similar context to avoid unexpected
40  * return values.
41  */
42 #define XT_SOCKET_SUPPORTED_HOOKS \
43         ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
44
45
46 static const char *module_procdirname = "xt_qtaguid";
47 static struct proc_dir_entry *xt_qtaguid_procdir;
48
49 static unsigned int proc_iface_perms = S_IRUGO;
50 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
51
52 static struct proc_dir_entry *xt_qtaguid_stats_file;
53 static unsigned int proc_stats_perms = S_IRUGO;
54 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
55
56 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
57
58 /* Everybody can write. But proc_ctrl_write_limited is true by default which
59  * limits what can be controlled. See the can_*() functions.
60  */
61 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
62 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
63
64 /* Limited by default, so the gid of the ctrl and stats proc entries
65  * will limit what can be done. See the can_*() functions.
66  */
67 static bool proc_stats_readall_limited = true;
68 static bool proc_ctrl_write_limited = true;
69
70 module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
71                    S_IRUGO | S_IWUSR);
72 module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
73                    S_IRUGO | S_IWUSR);
74
75 /*
76  * Limit the number of active tags (via socket tags) for a given UID.
77  * Multiple processes could share the UID.
78  */
79 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
80 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
81
82 /*
83  * After the kernel has initiallized this module, it is still possible
84  * to make it passive.
85  * Setting passive to Y:
86  *  - the iface stats handling will not act on notifications.
87  *  - iptables matches will never match.
88  *  - ctrl commands silently succeed.
89  *  - stats are always empty.
90  * This is mostly usefull when a bug is suspected.
91  */
92 static bool module_passive;
93 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
94
95 /*
96  * Control how qtaguid data is tracked per proc/uid.
97  * Setting tag_tracking_passive to Y:
98  *  - don't create proc specific structs to track tags
99  *  - don't check that active tag stats exceed some limits.
100  *  - don't clean up socket tags on process exits.
101  * This is mostly usefull when a bug is suspected.
102  */
103 static bool qtu_proc_handling_passive;
104 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
105                    S_IRUGO | S_IWUSR);
106
107 #define QTU_DEV_NAME "xt_qtaguid"
108
109 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
110 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
111
112 /*---------------------------------------------------------------------------*/
113 static const char *iface_stat_procdirname = "iface_stat";
114 static struct proc_dir_entry *iface_stat_procdir;
115 /*
116  * The iface_stat_all* will go away once userspace gets use to the new fields
117  * that have a format line.
118  */
119 static const char *iface_stat_all_procfilename = "iface_stat_all";
120 static struct proc_dir_entry *iface_stat_all_procfile;
121 static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
122 static struct proc_dir_entry *iface_stat_fmt_procfile;
123
124
125 /*
126  * Ordering of locks:
127  *  outer locks:
128  *    iface_stat_list_lock
129  *    sock_tag_list_lock
130  *  inner locks:
131  *    uid_tag_data_tree_lock
132  *    tag_counter_set_list_lock
133  * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
134  * is acquired.
135  *
136  * Call tree with all lock holders as of 2012-04-27:
137  *
138  * iface_stat_fmt_proc_read()
139  *   iface_stat_list_lock
140  *     (struct iface_stat)
141  *
142  * qtaguid_ctrl_proc_read()
143  *   sock_tag_list_lock
144  *     (sock_tag_tree)
145  *     (struct proc_qtu_data->sock_tag_list)
146  *   prdebug_full_state()
147  *     sock_tag_list_lock
148  *       (sock_tag_tree)
149  *     uid_tag_data_tree_lock
150  *       (uid_tag_data_tree)
151  *       (proc_qtu_data_tree)
152  *     iface_stat_list_lock
153  *
154  * qtaguid_stats_proc_read()
155  *   iface_stat_list_lock
156  *     struct iface_stat->tag_stat_list_lock
157  *
158  * qtudev_open()
159  *   uid_tag_data_tree_lock
160  *
161  * qtudev_release()
162  *   sock_tag_data_list_lock
163  *     uid_tag_data_tree_lock
164  *   prdebug_full_state()
165  *     sock_tag_list_lock
166  *     uid_tag_data_tree_lock
167  *     iface_stat_list_lock
168  *
169  * iface_netdev_event_handler()
170  *   iface_stat_create()
171  *     iface_stat_list_lock
172  *   iface_stat_update()
173  *     iface_stat_list_lock
174  *
175  * iface_inetaddr_event_handler()
176  *   iface_stat_create()
177  *     iface_stat_list_lock
178  *   iface_stat_update()
179  *     iface_stat_list_lock
180  *
181  * iface_inet6addr_event_handler()
182  *   iface_stat_create_ipv6()
183  *     iface_stat_list_lock
184  *   iface_stat_update()
185  *     iface_stat_list_lock
186  *
187  * qtaguid_mt()
188  *   account_for_uid()
189  *     if_tag_stat_update()
190  *       get_sock_stat()
191  *         sock_tag_list_lock
192  *       struct iface_stat->tag_stat_list_lock
193  *         tag_stat_update()
194  *           get_active_counter_set()
195  *             tag_counter_set_list_lock
196  *         tag_stat_update()
197  *           get_active_counter_set()
198  *             tag_counter_set_list_lock
199  *
200  *
201  * qtaguid_ctrl_parse()
202  *   ctrl_cmd_delete()
203  *     sock_tag_list_lock
204  *     tag_counter_set_list_lock
205  *     iface_stat_list_lock
206  *       struct iface_stat->tag_stat_list_lock
207  *     uid_tag_data_tree_lock
208  *   ctrl_cmd_counter_set()
209  *     tag_counter_set_list_lock
210  *   ctrl_cmd_tag()
211  *     sock_tag_list_lock
212  *       (sock_tag_tree)
213  *       get_tag_ref()
214  *         uid_tag_data_tree_lock
215  *           (uid_tag_data_tree)
216  *       uid_tag_data_tree_lock
217  *         (proc_qtu_data_tree)
218  *   ctrl_cmd_untag()
219  *     sock_tag_list_lock
220  *     uid_tag_data_tree_lock
221  *
222  */
223 static LIST_HEAD(iface_stat_list);
224 static DEFINE_SPINLOCK(iface_stat_list_lock);
225
226 static struct rb_root sock_tag_tree = RB_ROOT;
227 static DEFINE_SPINLOCK(sock_tag_list_lock);
228
229 static struct rb_root tag_counter_set_tree = RB_ROOT;
230 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
231
232 static struct rb_root uid_tag_data_tree = RB_ROOT;
233 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
234
235 static struct rb_root proc_qtu_data_tree = RB_ROOT;
236 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
237
238 static struct qtaguid_event_counts qtu_events;
239 /*----------------------------------------------*/
240 static bool can_manipulate_uids(void)
241 {
242         /* root pwnd */
243         return in_egroup_p(xt_qtaguid_ctrl_file->gid)
244                 || unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_limited)
245                 || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
246 }
247
248 static bool can_impersonate_uid(uid_t uid)
249 {
250         return uid == current_fsuid() || can_manipulate_uids();
251 }
252
253 static bool can_read_other_uid_stats(uid_t uid)
254 {
255         /* root pwnd */
256         return in_egroup_p(xt_qtaguid_stats_file->gid)
257                 || unlikely(!current_fsuid()) || uid == current_fsuid()
258                 || unlikely(!proc_stats_readall_limited)
259                 || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
260 }
261
262 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
263                                   enum ifs_tx_rx direction,
264                                   enum ifs_proto ifs_proto,
265                                   int bytes,
266                                   int packets)
267 {
268         counters->bpc[set][direction][ifs_proto].bytes += bytes;
269         counters->bpc[set][direction][ifs_proto].packets += packets;
270 }
271
272 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
273 {
274         struct rb_node *node = root->rb_node;
275
276         while (node) {
277                 struct tag_node *data = rb_entry(node, struct tag_node, node);
278                 int result;
279                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
280                          " node=%p data=%p\n", tag, node, data);
281                 result = tag_compare(tag, data->tag);
282                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
283                          " data.tag=0x%llx (uid=%u) res=%d\n",
284                          tag, data->tag, get_uid_from_tag(data->tag), result);
285                 if (result < 0)
286                         node = node->rb_left;
287                 else if (result > 0)
288                         node = node->rb_right;
289                 else
290                         return data;
291         }
292         return NULL;
293 }
294
295 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
296 {
297         struct rb_node **new = &(root->rb_node), *parent = NULL;
298
299         /* Figure out where to put new node */
300         while (*new) {
301                 struct tag_node *this = rb_entry(*new, struct tag_node,
302                                                  node);
303                 int result = tag_compare(data->tag, this->tag);
304                 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
305                          " (uid=%u)\n", __func__,
306                          this->tag,
307                          get_uid_from_tag(this->tag));
308                 parent = *new;
309                 if (result < 0)
310                         new = &((*new)->rb_left);
311                 else if (result > 0)
312                         new = &((*new)->rb_right);
313                 else
314                         BUG();
315         }
316
317         /* Add new node and rebalance tree. */
318         rb_link_node(&data->node, parent, new);
319         rb_insert_color(&data->node, root);
320 }
321
322 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
323 {
324         tag_node_tree_insert(&data->tn, root);
325 }
326
327 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
328 {
329         struct tag_node *node = tag_node_tree_search(root, tag);
330         if (!node)
331                 return NULL;
332         return rb_entry(&node->node, struct tag_stat, tn.node);
333 }
334
335 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
336                                         struct rb_root *root)
337 {
338         tag_node_tree_insert(&data->tn, root);
339 }
340
341 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
342                                                            tag_t tag)
343 {
344         struct tag_node *node = tag_node_tree_search(root, tag);
345         if (!node)
346                 return NULL;
347         return rb_entry(&node->node, struct tag_counter_set, tn.node);
348
349 }
350
351 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
352 {
353         tag_node_tree_insert(&data->tn, root);
354 }
355
356 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
357 {
358         struct tag_node *node = tag_node_tree_search(root, tag);
359         if (!node)
360                 return NULL;
361         return rb_entry(&node->node, struct tag_ref, tn.node);
362 }
363
364 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
365                                              const struct sock *sk)
366 {
367         struct rb_node *node = root->rb_node;
368
369         while (node) {
370                 struct sock_tag *data = rb_entry(node, struct sock_tag,
371                                                  sock_node);
372                 if (sk < data->sk)
373                         node = node->rb_left;
374                 else if (sk > data->sk)
375                         node = node->rb_right;
376                 else
377                         return data;
378         }
379         return NULL;
380 }
381
382 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
383 {
384         struct rb_node **new = &(root->rb_node), *parent = NULL;
385
386         /* Figure out where to put new node */
387         while (*new) {
388                 struct sock_tag *this = rb_entry(*new, struct sock_tag,
389                                                  sock_node);
390                 parent = *new;
391                 if (data->sk < this->sk)
392                         new = &((*new)->rb_left);
393                 else if (data->sk > this->sk)
394                         new = &((*new)->rb_right);
395                 else
396                         BUG();
397         }
398
399         /* Add new node and rebalance tree. */
400         rb_link_node(&data->sock_node, parent, new);
401         rb_insert_color(&data->sock_node, root);
402 }
403
404 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
405 {
406         struct rb_node *node;
407         struct sock_tag *st_entry;
408
409         node = rb_first(st_to_free_tree);
410         while (node) {
411                 st_entry = rb_entry(node, struct sock_tag, sock_node);
412                 node = rb_next(node);
413                 CT_DEBUG("qtaguid: %s(): "
414                          "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
415                          st_entry->sk,
416                          st_entry->tag,
417                          get_uid_from_tag(st_entry->tag));
418                 rb_erase(&st_entry->sock_node, st_to_free_tree);
419                 sockfd_put(st_entry->socket);
420                 kfree(st_entry);
421         }
422 }
423
424 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
425                                                        const pid_t pid)
426 {
427         struct rb_node *node = root->rb_node;
428
429         while (node) {
430                 struct proc_qtu_data *data = rb_entry(node,
431                                                       struct proc_qtu_data,
432                                                       node);
433                 if (pid < data->pid)
434                         node = node->rb_left;
435                 else if (pid > data->pid)
436                         node = node->rb_right;
437                 else
438                         return data;
439         }
440         return NULL;
441 }
442
443 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
444                                       struct rb_root *root)
445 {
446         struct rb_node **new = &(root->rb_node), *parent = NULL;
447
448         /* Figure out where to put new node */
449         while (*new) {
450                 struct proc_qtu_data *this = rb_entry(*new,
451                                                       struct proc_qtu_data,
452                                                       node);
453                 parent = *new;
454                 if (data->pid < this->pid)
455                         new = &((*new)->rb_left);
456                 else if (data->pid > this->pid)
457                         new = &((*new)->rb_right);
458                 else
459                         BUG();
460         }
461
462         /* Add new node and rebalance tree. */
463         rb_link_node(&data->node, parent, new);
464         rb_insert_color(&data->node, root);
465 }
466
467 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
468                                      struct rb_root *root)
469 {
470         struct rb_node **new = &(root->rb_node), *parent = NULL;
471
472         /* Figure out where to put new node */
473         while (*new) {
474                 struct uid_tag_data *this = rb_entry(*new,
475                                                      struct uid_tag_data,
476                                                      node);
477                 parent = *new;
478                 if (data->uid < this->uid)
479                         new = &((*new)->rb_left);
480                 else if (data->uid > this->uid)
481                         new = &((*new)->rb_right);
482                 else
483                         BUG();
484         }
485
486         /* Add new node and rebalance tree. */
487         rb_link_node(&data->node, parent, new);
488         rb_insert_color(&data->node, root);
489 }
490
491 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
492                                                      uid_t uid)
493 {
494         struct rb_node *node = root->rb_node;
495
496         while (node) {
497                 struct uid_tag_data *data = rb_entry(node,
498                                                      struct uid_tag_data,
499                                                      node);
500                 if (uid < data->uid)
501                         node = node->rb_left;
502                 else if (uid > data->uid)
503                         node = node->rb_right;
504                 else
505                         return data;
506         }
507         return NULL;
508 }
509
510 /*
511  * Allocates a new uid_tag_data struct if needed.
512  * Returns a pointer to the found or allocated uid_tag_data.
513  * Returns a PTR_ERR on failures, and lock is not held.
514  * If found is not NULL:
515  *   sets *found to true if not allocated.
516  *   sets *found to false if allocated.
517  */
518 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
519 {
520         struct uid_tag_data *utd_entry;
521
522         /* Look for top level uid_tag_data for the UID */
523         utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
524         DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
525
526         if (found_res)
527                 *found_res = utd_entry;
528         if (utd_entry)
529                 return utd_entry;
530
531         utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
532         if (!utd_entry) {
533                 pr_err("qtaguid: get_uid_data(%u): "
534                        "tag data alloc failed\n", uid);
535                 return ERR_PTR(-ENOMEM);
536         }
537
538         utd_entry->uid = uid;
539         utd_entry->tag_ref_tree = RB_ROOT;
540         uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
541         DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
542         return utd_entry;
543 }
544
545 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
546 static struct tag_ref *new_tag_ref(tag_t new_tag,
547                                    struct uid_tag_data *utd_entry)
548 {
549         struct tag_ref *tr_entry;
550         int res;
551
552         if (utd_entry->num_active_tags + 1 > max_sock_tags) {
553                 pr_info("qtaguid: new_tag_ref(0x%llx): "
554                         "tag ref alloc quota exceeded. max=%d\n",
555                         new_tag, max_sock_tags);
556                 res = -EMFILE;
557                 goto err_res;
558
559         }
560
561         tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
562         if (!tr_entry) {
563                 pr_err("qtaguid: new_tag_ref(0x%llx): "
564                        "tag ref alloc failed\n",
565                        new_tag);
566                 res = -ENOMEM;
567                 goto err_res;
568         }
569         tr_entry->tn.tag = new_tag;
570         /* tr_entry->num_sock_tags  handled by caller */
571         utd_entry->num_active_tags++;
572         tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
573         DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
574                  " inserted new tag ref %p\n",
575                  new_tag, tr_entry);
576         return tr_entry;
577
578 err_res:
579         return ERR_PTR(res);
580 }
581
582 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
583                                       struct uid_tag_data **utd_res)
584 {
585         struct uid_tag_data *utd_entry;
586         struct tag_ref *tr_entry;
587         bool found_utd;
588         uid_t uid = get_uid_from_tag(full_tag);
589
590         DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
591                  full_tag, uid);
592
593         utd_entry = get_uid_data(uid, &found_utd);
594         if (IS_ERR_OR_NULL(utd_entry)) {
595                 if (utd_res)
596                         *utd_res = utd_entry;
597                 return NULL;
598         }
599
600         tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
601         if (utd_res)
602                 *utd_res = utd_entry;
603         DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
604                  full_tag, utd_entry, tr_entry);
605         return tr_entry;
606 }
607
608 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
609 static struct tag_ref *get_tag_ref(tag_t full_tag,
610                                    struct uid_tag_data **utd_res)
611 {
612         struct uid_tag_data *utd_entry;
613         struct tag_ref *tr_entry;
614
615         DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
616                  full_tag);
617         spin_lock_bh(&uid_tag_data_tree_lock);
618         tr_entry = lookup_tag_ref(full_tag, &utd_entry);
619         BUG_ON(IS_ERR_OR_NULL(utd_entry));
620         if (!tr_entry)
621                 tr_entry = new_tag_ref(full_tag, utd_entry);
622
623         spin_unlock_bh(&uid_tag_data_tree_lock);
624         if (utd_res)
625                 *utd_res = utd_entry;
626         DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
627                  full_tag, utd_entry, tr_entry);
628         return tr_entry;
629 }
630
631 /* Checks and maybe frees the UID Tag Data entry */
632 static void put_utd_entry(struct uid_tag_data *utd_entry)
633 {
634         /* Are we done with the UID tag data entry? */
635         if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
636                 !utd_entry->num_pqd) {
637                 DR_DEBUG("qtaguid: %s(): "
638                          "erase utd_entry=%p uid=%u "
639                          "by pid=%u tgid=%u uid=%u\n", __func__,
640                          utd_entry, utd_entry->uid,
641                          current->pid, current->tgid, current_fsuid());
642                 BUG_ON(utd_entry->num_active_tags);
643                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
644                 kfree(utd_entry);
645         } else {
646                 DR_DEBUG("qtaguid: %s(): "
647                          "utd_entry=%p still has %d tags %d proc_qtu_data\n",
648                          __func__, utd_entry, utd_entry->num_active_tags,
649                          utd_entry->num_pqd);
650                 BUG_ON(!(utd_entry->num_active_tags ||
651                          utd_entry->num_pqd));
652         }
653 }
654
655 /*
656  * If no sock_tags are using this tag_ref,
657  * decrements refcount of utd_entry, removes tr_entry
658  * from utd_entry->tag_ref_tree and frees.
659  */
660 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
661                                         struct uid_tag_data *utd_entry)
662 {
663         DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
664                  tr_entry, tr_entry->tn.tag,
665                  get_uid_from_tag(tr_entry->tn.tag));
666         if (!tr_entry->num_sock_tags) {
667                 BUG_ON(!utd_entry->num_active_tags);
668                 utd_entry->num_active_tags--;
669                 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
670                 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
671                 kfree(tr_entry);
672         }
673 }
674
675 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
676 {
677         struct rb_node *node;
678         struct tag_ref *tr_entry;
679         tag_t acct_tag;
680
681         DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
682                  full_tag, get_uid_from_tag(full_tag));
683         acct_tag = get_atag_from_tag(full_tag);
684         node = rb_first(&utd_entry->tag_ref_tree);
685         while (node) {
686                 tr_entry = rb_entry(node, struct tag_ref, tn.node);
687                 node = rb_next(node);
688                 if (!acct_tag || tr_entry->tn.tag == full_tag)
689                         free_tag_ref_from_utd_entry(tr_entry, utd_entry);
690         }
691 }
692
693 static int read_proc_u64(char *page, char **start, off_t off,
694                         int count, int *eof, void *data)
695 {
696         int len;
697         uint64_t value;
698         char *p = page;
699         uint64_t *iface_entry = data;
700
701         if (!data)
702                 return 0;
703
704         value = *iface_entry;
705         p += sprintf(p, "%llu\n", value);
706         len = (p - page) - off;
707         *eof = (len <= count) ? 1 : 0;
708         *start = page + off;
709         return len;
710 }
711
712 static int read_proc_bool(char *page, char **start, off_t off,
713                         int count, int *eof, void *data)
714 {
715         int len;
716         bool value;
717         char *p = page;
718         bool *bool_entry = data;
719
720         if (!data)
721                 return 0;
722
723         value = *bool_entry;
724         p += sprintf(p, "%u\n", value);
725         len = (p - page) - off;
726         *eof = (len <= count) ? 1 : 0;
727         *start = page + off;
728         return len;
729 }
730
731 static int get_active_counter_set(tag_t tag)
732 {
733         int active_set = 0;
734         struct tag_counter_set *tcs;
735
736         MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
737                  " (uid=%u)\n",
738                  tag, get_uid_from_tag(tag));
739         /* For now we only handle UID tags for active sets */
740         tag = get_utag_from_tag(tag);
741         spin_lock_bh(&tag_counter_set_list_lock);
742         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
743         if (tcs)
744                 active_set = tcs->active_set;
745         spin_unlock_bh(&tag_counter_set_list_lock);
746         return active_set;
747 }
748
749 /*
750  * Find the entry for tracking the specified interface.
751  * Caller must hold iface_stat_list_lock
752  */
753 static struct iface_stat *get_iface_entry(const char *ifname)
754 {
755         struct iface_stat *iface_entry;
756
757         /* Find the entry for tracking the specified tag within the interface */
758         if (ifname == NULL) {
759                 pr_info("qtaguid: iface_stat: get() NULL device name\n");
760                 return NULL;
761         }
762
763         /* Iterate over interfaces */
764         list_for_each_entry(iface_entry, &iface_stat_list, list) {
765                 if (!strcmp(ifname, iface_entry->ifname))
766                         goto done;
767         }
768         iface_entry = NULL;
769 done:
770         return iface_entry;
771 }
772
773 /* This is for fmt2 only */
774 static int pp_iface_stat_line(bool header, char *outp,
775                               int char_count, struct iface_stat *iface_entry)
776 {
777         int len;
778         if (header) {
779                 len = snprintf(outp, char_count,
780                                "ifname "
781                                "total_skb_rx_bytes total_skb_rx_packets "
782                                "total_skb_tx_bytes total_skb_tx_packets "
783                                "rx_tcp_bytes rx_tcp_packets "
784                                "rx_udp_bytes rx_udp_packets "
785                                "rx_other_bytes rx_other_packets "
786                                "tx_tcp_bytes tx_tcp_packets "
787                                "tx_udp_bytes tx_udp_packets "
788                                "tx_other_bytes tx_other_packets\n"
789                         );
790         } else {
791                 struct data_counters *cnts;
792                 int cnt_set = 0;   /* We only use one set for the device */
793                 cnts = &iface_entry->totals_via_skb;
794                 len = snprintf(
795                         outp, char_count,
796                         "%s "
797                         "%llu %llu %llu %llu %llu %llu %llu %llu "
798                         "%llu %llu %llu %llu %llu %llu %llu %llu\n",
799                         iface_entry->ifname,
800                         dc_sum_bytes(cnts, cnt_set, IFS_RX),
801                         dc_sum_packets(cnts, cnt_set, IFS_RX),
802                         dc_sum_bytes(cnts, cnt_set, IFS_TX),
803                         dc_sum_packets(cnts, cnt_set, IFS_TX),
804                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
805                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
806                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
807                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
808                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
809                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
810                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
811                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
812                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
813                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
814                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
815                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
816         }
817         return len;
818 }
819
820 static int iface_stat_fmt_proc_read(char *page, char **num_items_returned,
821                                     off_t items_to_skip, int char_count,
822                                     int *eof, void *data)
823 {
824         char *outp = page;
825         int item_index = 0;
826         int len;
827         int fmt = (int)data; /* The data is just 1 (old) or 2 (uses fmt) */
828         struct iface_stat *iface_entry;
829         struct rtnl_link_stats64 dev_stats, *stats;
830         struct rtnl_link_stats64 no_dev_stats = {0};
831
832         if (unlikely(module_passive)) {
833                 *eof = 1;
834                 return 0;
835         }
836
837         CT_DEBUG("qtaguid:proc iface_stat_fmt "
838                  "pid=%u tgid=%u uid=%u "
839                  "page=%p *num_items_returned=%p off=%ld "
840                  "char_count=%d *eof=%d\n",
841                  current->pid, current->tgid, current_fsuid(),
842                  page, *num_items_returned,
843                  items_to_skip, char_count, *eof);
844
845         if (*eof)
846                 return 0;
847
848         if (fmt == 2 && item_index++ >= items_to_skip) {
849                 len = pp_iface_stat_line(true, outp, char_count, NULL);
850                 if (len >= char_count) {
851                         *outp = '\0';
852                         return outp - page;
853                 }
854                 outp += len;
855                 char_count -= len;
856                 (*num_items_returned)++;
857         }
858
859         /*
860          * This lock will prevent iface_stat_update() from changing active,
861          * and in turn prevent an interface from unregistering itself.
862          */
863         spin_lock_bh(&iface_stat_list_lock);
864         list_for_each_entry(iface_entry, &iface_stat_list, list) {
865                 if (item_index++ < items_to_skip)
866                         continue;
867
868                 if (iface_entry->active) {
869                         stats = dev_get_stats(iface_entry->net_dev,
870                                               &dev_stats);
871                 } else {
872                         stats = &no_dev_stats;
873                 }
874                 /*
875                  * If the meaning of the data changes, then update the fmtX
876                  * string.
877                  */
878                 if (fmt == 1) {
879                         len = snprintf(
880                                 outp, char_count,
881                                 "%s %d "
882                                 "%llu %llu %llu %llu "
883                                 "%llu %llu %llu %llu\n",
884                                 iface_entry->ifname,
885                                 iface_entry->active,
886                                 iface_entry->totals_via_dev[IFS_RX].bytes,
887                                 iface_entry->totals_via_dev[IFS_RX].packets,
888                                 iface_entry->totals_via_dev[IFS_TX].bytes,
889                                 iface_entry->totals_via_dev[IFS_TX].packets,
890                                 stats->rx_bytes, stats->rx_packets,
891                                 stats->tx_bytes, stats->tx_packets
892                                 );
893                 } else {
894                         len = pp_iface_stat_line(false, outp, char_count,
895                                                  iface_entry);
896                 }
897                 if (len >= char_count) {
898                         spin_unlock_bh(&iface_stat_list_lock);
899                         *outp = '\0';
900                         return outp - page;
901                 }
902                 outp += len;
903                 char_count -= len;
904                 (*num_items_returned)++;
905         }
906         spin_unlock_bh(&iface_stat_list_lock);
907
908         *eof = 1;
909         return outp - page;
910 }
911
912 static void iface_create_proc_worker(struct work_struct *work)
913 {
914         struct proc_dir_entry *proc_entry;
915         struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
916                                                    iface_work);
917         struct iface_stat *new_iface  = isw->iface_entry;
918
919         /* iface_entries are not deleted, so safe to manipulate. */
920         proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
921         if (IS_ERR_OR_NULL(proc_entry)) {
922                 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
923                 kfree(isw);
924                 return;
925         }
926
927         new_iface->proc_ptr = proc_entry;
928
929         create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
930                                read_proc_u64,
931                                &new_iface->totals_via_dev[IFS_TX].bytes);
932         create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
933                                read_proc_u64,
934                                &new_iface->totals_via_dev[IFS_RX].bytes);
935         create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
936                                read_proc_u64,
937                                &new_iface->totals_via_dev[IFS_TX].packets);
938         create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
939                                read_proc_u64,
940                                &new_iface->totals_via_dev[IFS_RX].packets);
941         create_proc_read_entry("active", proc_iface_perms, proc_entry,
942                         read_proc_bool, &new_iface->active);
943
944         IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
945                  "entry=%p dev=%s\n", new_iface, new_iface->ifname);
946         kfree(isw);
947 }
948
949 /*
950  * Will set the entry's active state, and
951  * update the net_dev accordingly also.
952  */
953 static void _iface_stat_set_active(struct iface_stat *entry,
954                                    struct net_device *net_dev,
955                                    bool activate)
956 {
957         if (activate) {
958                 entry->net_dev = net_dev;
959                 entry->active = true;
960                 IF_DEBUG("qtaguid: %s(%s): "
961                          "enable tracking. rfcnt=%d\n", __func__,
962                          entry->ifname,
963                          __this_cpu_read(*net_dev->pcpu_refcnt));
964         } else {
965                 entry->active = false;
966                 entry->net_dev = NULL;
967                 IF_DEBUG("qtaguid: %s(%s): "
968                          "disable tracking. rfcnt=%d\n", __func__,
969                          entry->ifname,
970                          __this_cpu_read(*net_dev->pcpu_refcnt));
971
972         }
973 }
974
975 /* Caller must hold iface_stat_list_lock */
976 static struct iface_stat *iface_alloc(struct net_device *net_dev)
977 {
978         struct iface_stat *new_iface;
979         struct iface_stat_work *isw;
980
981         new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
982         if (new_iface == NULL) {
983                 pr_err("qtaguid: iface_stat: create(%s): "
984                        "iface_stat alloc failed\n", net_dev->name);
985                 return NULL;
986         }
987         new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
988         if (new_iface->ifname == NULL) {
989                 pr_err("qtaguid: iface_stat: create(%s): "
990                        "ifname alloc failed\n", net_dev->name);
991                 kfree(new_iface);
992                 return NULL;
993         }
994         spin_lock_init(&new_iface->tag_stat_list_lock);
995         new_iface->tag_stat_tree = RB_ROOT;
996         _iface_stat_set_active(new_iface, net_dev, true);
997
998         /*
999          * ipv6 notifier chains are atomic :(
1000          * No create_proc_read_entry() for you!
1001          */
1002         isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
1003         if (!isw) {
1004                 pr_err("qtaguid: iface_stat: create(%s): "
1005                        "work alloc failed\n", new_iface->ifname);
1006                 _iface_stat_set_active(new_iface, net_dev, false);
1007                 kfree(new_iface->ifname);
1008                 kfree(new_iface);
1009                 return NULL;
1010         }
1011         isw->iface_entry = new_iface;
1012         INIT_WORK(&isw->iface_work, iface_create_proc_worker);
1013         schedule_work(&isw->iface_work);
1014         list_add(&new_iface->list, &iface_stat_list);
1015         return new_iface;
1016 }
1017
1018 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
1019                                                struct iface_stat *iface)
1020 {
1021         struct rtnl_link_stats64 dev_stats, *stats;
1022         bool stats_rewound;
1023
1024         stats = dev_get_stats(net_dev, &dev_stats);
1025         /* No empty packets */
1026         stats_rewound =
1027                 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
1028                 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
1029
1030         IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
1031                  "bytes rx/tx=%llu/%llu "
1032                  "active=%d last_known=%d "
1033                  "stats_rewound=%d\n", __func__,
1034                  net_dev ? net_dev->name : "?",
1035                  iface, net_dev,
1036                  stats->rx_bytes, stats->tx_bytes,
1037                  iface->active, iface->last_known_valid, stats_rewound);
1038
1039         if (iface->active && iface->last_known_valid && stats_rewound) {
1040                 pr_warn_once("qtaguid: iface_stat: %s(%s): "
1041                              "iface reset its stats unexpectedly\n", __func__,
1042                              net_dev->name);
1043
1044                 iface->totals_via_dev[IFS_TX].bytes +=
1045                         iface->last_known[IFS_TX].bytes;
1046                 iface->totals_via_dev[IFS_TX].packets +=
1047                         iface->last_known[IFS_TX].packets;
1048                 iface->totals_via_dev[IFS_RX].bytes +=
1049                         iface->last_known[IFS_RX].bytes;
1050                 iface->totals_via_dev[IFS_RX].packets +=
1051                         iface->last_known[IFS_RX].packets;
1052                 iface->last_known_valid = false;
1053                 IF_DEBUG("qtaguid: %s(%s): iface=%p "
1054                          "used last known bytes rx/tx=%llu/%llu\n", __func__,
1055                          iface->ifname, iface, iface->last_known[IFS_RX].bytes,
1056                          iface->last_known[IFS_TX].bytes);
1057         }
1058 }
1059
1060 /*
1061  * Create a new entry for tracking the specified interface.
1062  * Do nothing if the entry already exists.
1063  * Called when an interface is configured with a valid IP address.
1064  */
1065 static void iface_stat_create(struct net_device *net_dev,
1066                               struct in_ifaddr *ifa)
1067 {
1068         struct in_device *in_dev = NULL;
1069         const char *ifname;
1070         struct iface_stat *entry;
1071         __be32 ipaddr = 0;
1072         struct iface_stat *new_iface;
1073
1074         IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
1075                  net_dev ? net_dev->name : "?",
1076                  ifa, net_dev);
1077         if (!net_dev) {
1078                 pr_err("qtaguid: iface_stat: create(): no net dev\n");
1079                 return;
1080         }
1081
1082         ifname = net_dev->name;
1083         if (!ifa) {
1084                 in_dev = in_dev_get(net_dev);
1085                 if (!in_dev) {
1086                         pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
1087                                ifname);
1088                         return;
1089                 }
1090                 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
1091                          ifname, in_dev);
1092                 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1093                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
1094                                  "ifa=%p ifa_label=%s\n",
1095                                  ifname, ifa,
1096                                  ifa->ifa_label ? ifa->ifa_label : "(null)");
1097                         if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
1098                                 break;
1099                 }
1100         }
1101
1102         if (!ifa) {
1103                 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
1104                          ifname);
1105                 goto done_put;
1106         }
1107         ipaddr = ifa->ifa_local;
1108
1109         spin_lock_bh(&iface_stat_list_lock);
1110         entry = get_iface_entry(ifname);
1111         if (entry != NULL) {
1112                 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
1113                          ifname, entry);
1114                 iface_check_stats_reset_and_adjust(net_dev, entry);
1115                 _iface_stat_set_active(entry, net_dev, true);
1116                 IF_DEBUG("qtaguid: %s(%s): "
1117                          "tracking now %d on ip=%pI4\n", __func__,
1118                          entry->ifname, true, &ipaddr);
1119                 goto done_unlock_put;
1120         }
1121
1122         new_iface = iface_alloc(net_dev);
1123         IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1124                  "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1125 done_unlock_put:
1126         spin_unlock_bh(&iface_stat_list_lock);
1127 done_put:
1128         if (in_dev)
1129                 in_dev_put(in_dev);
1130 }
1131
1132 static void iface_stat_create_ipv6(struct net_device *net_dev,
1133                                    struct inet6_ifaddr *ifa)
1134 {
1135         struct in_device *in_dev;
1136         const char *ifname;
1137         struct iface_stat *entry;
1138         struct iface_stat *new_iface;
1139         int addr_type;
1140
1141         IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1142                  ifa, net_dev, net_dev ? net_dev->name : "");
1143         if (!net_dev) {
1144                 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1145                 return;
1146         }
1147         ifname = net_dev->name;
1148
1149         in_dev = in_dev_get(net_dev);
1150         if (!in_dev) {
1151                 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1152                        ifname);
1153                 return;
1154         }
1155
1156         IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1157                  ifname, in_dev);
1158
1159         if (!ifa) {
1160                 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1161                          ifname);
1162                 goto done_put;
1163         }
1164         addr_type = ipv6_addr_type(&ifa->addr);
1165
1166         spin_lock_bh(&iface_stat_list_lock);
1167         entry = get_iface_entry(ifname);
1168         if (entry != NULL) {
1169                 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1170                          ifname, entry);
1171                 iface_check_stats_reset_and_adjust(net_dev, entry);
1172                 _iface_stat_set_active(entry, net_dev, true);
1173                 IF_DEBUG("qtaguid: %s(%s): "
1174                          "tracking now %d on ip=%pI6c\n", __func__,
1175                          entry->ifname, true, &ifa->addr);
1176                 goto done_unlock_put;
1177         }
1178
1179         new_iface = iface_alloc(net_dev);
1180         IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1181                  "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1182
1183 done_unlock_put:
1184         spin_unlock_bh(&iface_stat_list_lock);
1185 done_put:
1186         in_dev_put(in_dev);
1187 }
1188
1189 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1190 {
1191         MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1192         return sock_tag_tree_search(&sock_tag_tree, sk);
1193 }
1194
1195 static struct sock_tag *get_sock_stat(const struct sock *sk)
1196 {
1197         struct sock_tag *sock_tag_entry;
1198         MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1199         if (!sk)
1200                 return NULL;
1201         spin_lock_bh(&sock_tag_list_lock);
1202         sock_tag_entry = get_sock_stat_nl(sk);
1203         spin_unlock_bh(&sock_tag_list_lock);
1204         return sock_tag_entry;
1205 }
1206
1207 static int ipx_proto(const struct sk_buff *skb,
1208                      struct xt_action_param *par)
1209 {
1210         int thoff = 0, tproto;
1211
1212         switch (par->family) {
1213         case NFPROTO_IPV6:
1214                 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
1215                 if (tproto < 0)
1216                         MT_DEBUG("%s(): transport header not found in ipv6"
1217                                  " skb=%p\n", __func__, skb);
1218                 break;
1219         case NFPROTO_IPV4:
1220                 tproto = ip_hdr(skb)->protocol;
1221                 break;
1222         default:
1223                 tproto = IPPROTO_RAW;
1224         }
1225         return tproto;
1226 }
1227
1228 static void
1229 data_counters_update(struct data_counters *dc, int set,
1230                      enum ifs_tx_rx direction, int proto, int bytes)
1231 {
1232         switch (proto) {
1233         case IPPROTO_TCP:
1234                 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1235                 break;
1236         case IPPROTO_UDP:
1237                 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1238                 break;
1239         case IPPROTO_IP:
1240         default:
1241                 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1242                                     1);
1243                 break;
1244         }
1245 }
1246
1247 /*
1248  * Update stats for the specified interface. Do nothing if the entry
1249  * does not exist (when a device was never configured with an IP address).
1250  * Called when an device is being unregistered.
1251  */
1252 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1253 {
1254         struct rtnl_link_stats64 dev_stats, *stats;
1255         struct iface_stat *entry;
1256
1257         stats = dev_get_stats(net_dev, &dev_stats);
1258         spin_lock_bh(&iface_stat_list_lock);
1259         entry = get_iface_entry(net_dev->name);
1260         if (entry == NULL) {
1261                 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1262                          net_dev->name);
1263                 spin_unlock_bh(&iface_stat_list_lock);
1264                 return;
1265         }
1266
1267         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1268                  net_dev->name, entry);
1269         if (!entry->active) {
1270                 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1271                          net_dev->name);
1272                 spin_unlock_bh(&iface_stat_list_lock);
1273                 return;
1274         }
1275
1276         if (stash_only) {
1277                 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1278                 entry->last_known[IFS_TX].packets = stats->tx_packets;
1279                 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1280                 entry->last_known[IFS_RX].packets = stats->rx_packets;
1281                 entry->last_known_valid = true;
1282                 IF_DEBUG("qtaguid: %s(%s): "
1283                          "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1284                          net_dev->name, stats->rx_bytes, stats->tx_bytes);
1285                 spin_unlock_bh(&iface_stat_list_lock);
1286                 return;
1287         }
1288         entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
1289         entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
1290         entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
1291         entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
1292         /* We don't need the last_known[] anymore */
1293         entry->last_known_valid = false;
1294         _iface_stat_set_active(entry, net_dev, false);
1295         IF_DEBUG("qtaguid: %s(%s): "
1296                  "disable tracking. rx/tx=%llu/%llu\n", __func__,
1297                  net_dev->name, stats->rx_bytes, stats->tx_bytes);
1298         spin_unlock_bh(&iface_stat_list_lock);
1299 }
1300
1301 /*
1302  * Update stats for the specified interface from the skb.
1303  * Do nothing if the entry
1304  * does not exist (when a device was never configured with an IP address).
1305  * Called on each sk.
1306  */
1307 static void iface_stat_update_from_skb(const struct sk_buff *skb,
1308                                        struct xt_action_param *par)
1309 {
1310         struct iface_stat *entry;
1311         const struct net_device *el_dev;
1312         enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
1313         int bytes = skb->len;
1314         int proto;
1315
1316         if (!skb->dev) {
1317                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1318                 el_dev = par->in ? : par->out;
1319         } else {
1320                 const struct net_device *other_dev;
1321                 el_dev = skb->dev;
1322                 other_dev = par->in ? : par->out;
1323                 if (el_dev != other_dev) {
1324                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1325                                  "par->(in/out)=%p %s\n",
1326                                  par->hooknum, el_dev, el_dev->name, other_dev,
1327                                  other_dev->name);
1328                 }
1329         }
1330
1331         if (unlikely(!el_dev)) {
1332                 pr_err_ratelimited("qtaguid[%d]: %s(): no par->in/out?!!\n",
1333                                    par->hooknum, __func__);
1334                 BUG();
1335         } else if (unlikely(!el_dev->name)) {
1336                 pr_err_ratelimited("qtaguid[%d]: %s(): no dev->name?!!\n",
1337                                    par->hooknum, __func__);
1338                 BUG();
1339         } else {
1340                 proto = ipx_proto(skb, par);
1341                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1342                          par->hooknum, el_dev->name, el_dev->type,
1343                          par->family, proto);
1344         }
1345
1346         spin_lock_bh(&iface_stat_list_lock);
1347         entry = get_iface_entry(el_dev->name);
1348         if (entry == NULL) {
1349                 IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
1350                          __func__, el_dev->name);
1351                 spin_unlock_bh(&iface_stat_list_lock);
1352                 return;
1353         }
1354
1355         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1356                  el_dev->name, entry);
1357
1358         data_counters_update(&entry->totals_via_skb, 0, direction, proto,
1359                              bytes);
1360         spin_unlock_bh(&iface_stat_list_lock);
1361 }
1362
1363 static void tag_stat_update(struct tag_stat *tag_entry,
1364                         enum ifs_tx_rx direction, int proto, int bytes)
1365 {
1366         int active_set;
1367         active_set = get_active_counter_set(tag_entry->tn.tag);
1368         MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1369                  "dir=%d proto=%d bytes=%d)\n",
1370                  tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1371                  active_set, direction, proto, bytes);
1372         data_counters_update(&tag_entry->counters, active_set, direction,
1373                              proto, bytes);
1374         if (tag_entry->parent_counters)
1375                 data_counters_update(tag_entry->parent_counters, active_set,
1376                                      direction, proto, bytes);
1377 }
1378
1379 /*
1380  * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1381  * the interface.
1382  * iface_entry->tag_stat_list_lock should be held.
1383  */
1384 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1385                                            tag_t tag)
1386 {
1387         struct tag_stat *new_tag_stat_entry = NULL;
1388         IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1389                  " (uid=%u)\n", __func__,
1390                  iface_entry, tag, get_uid_from_tag(tag));
1391         new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1392         if (!new_tag_stat_entry) {
1393                 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1394                 goto done;
1395         }
1396         new_tag_stat_entry->tn.tag = tag;
1397         tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1398 done:
1399         return new_tag_stat_entry;
1400 }
1401
1402 static void if_tag_stat_update(const char *ifname, uid_t uid,
1403                                const struct sock *sk, enum ifs_tx_rx direction,
1404                                int proto, int bytes)
1405 {
1406         struct tag_stat *tag_stat_entry;
1407         tag_t tag, acct_tag;
1408         tag_t uid_tag;
1409         struct data_counters *uid_tag_counters;
1410         struct sock_tag *sock_tag_entry;
1411         struct iface_stat *iface_entry;
1412         struct tag_stat *new_tag_stat = NULL;
1413         MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1414                 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1415                  ifname, uid, sk, direction, proto, bytes);
1416
1417
1418         iface_entry = get_iface_entry(ifname);
1419         if (!iface_entry) {
1420                 pr_err_ratelimited("qtaguid: iface_stat: stat_update() "
1421                                    "%s not found\n", ifname);
1422                 return;
1423         }
1424         /* It is ok to process data when an iface_entry is inactive */
1425
1426         MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1427                  ifname, iface_entry);
1428
1429         /*
1430          * Look for a tagged sock.
1431          * It will have an acct_uid.
1432          */
1433         sock_tag_entry = get_sock_stat(sk);
1434         if (sock_tag_entry) {
1435                 tag = sock_tag_entry->tag;
1436                 acct_tag = get_atag_from_tag(tag);
1437                 uid_tag = get_utag_from_tag(tag);
1438         } else {
1439                 acct_tag = make_atag_from_value(0);
1440                 tag = combine_atag_with_uid(acct_tag, uid);
1441                 uid_tag = make_tag_from_uid(uid);
1442         }
1443         MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1444                  " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1445                  tag, get_uid_from_tag(tag), iface_entry);
1446         /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1447         spin_lock_bh(&iface_entry->tag_stat_list_lock);
1448
1449         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1450                                               tag);
1451         if (tag_stat_entry) {
1452                 /*
1453                  * Updating the {acct_tag, uid_tag} entry handles both stats:
1454                  * {0, uid_tag} will also get updated.
1455                  */
1456                 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1457                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1458                 return;
1459         }
1460
1461         /* Loop over tag list under this interface for {0,uid_tag} */
1462         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1463                                               uid_tag);
1464         if (!tag_stat_entry) {
1465                 /* Here: the base uid_tag did not exist */
1466                 /*
1467                  * No parent counters. So
1468                  *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1469                  */
1470                 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1471                 if (!new_tag_stat)
1472                         goto unlock;
1473                 uid_tag_counters = &new_tag_stat->counters;
1474         } else {
1475                 uid_tag_counters = &tag_stat_entry->counters;
1476         }
1477
1478         if (acct_tag) {
1479                 /* Create the child {acct_tag, uid_tag} and hook up parent. */
1480                 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1481                 if (!new_tag_stat)
1482                         goto unlock;
1483                 new_tag_stat->parent_counters = uid_tag_counters;
1484         } else {
1485                 /*
1486                  * For new_tag_stat to be still NULL here would require:
1487                  *  {0, uid_tag} exists
1488                  *  and {acct_tag, uid_tag} doesn't exist
1489                  *  AND acct_tag == 0.
1490                  * Impossible. This reassures us that new_tag_stat
1491                  * below will always be assigned.
1492                  */
1493                 BUG_ON(!new_tag_stat);
1494         }
1495         tag_stat_update(new_tag_stat, direction, proto, bytes);
1496 unlock:
1497         spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1498 }
1499
1500 static int iface_netdev_event_handler(struct notifier_block *nb,
1501                                       unsigned long event, void *ptr) {
1502         struct net_device *dev = ptr;
1503
1504         if (unlikely(module_passive))
1505                 return NOTIFY_DONE;
1506
1507         IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1508                  "ev=0x%lx/%s netdev=%p->name=%s\n",
1509                  event, netdev_evt_str(event), dev, dev ? dev->name : "");
1510
1511         switch (event) {
1512         case NETDEV_UP:
1513                 iface_stat_create(dev, NULL);
1514                 atomic64_inc(&qtu_events.iface_events);
1515                 break;
1516         case NETDEV_DOWN:
1517         case NETDEV_UNREGISTER:
1518                 iface_stat_update(dev, event == NETDEV_DOWN);
1519                 atomic64_inc(&qtu_events.iface_events);
1520                 break;
1521         }
1522         return NOTIFY_DONE;
1523 }
1524
1525 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1526                                          unsigned long event, void *ptr)
1527 {
1528         struct inet6_ifaddr *ifa = ptr;
1529         struct net_device *dev;
1530
1531         if (unlikely(module_passive))
1532                 return NOTIFY_DONE;
1533
1534         IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1535                  "ev=0x%lx/%s ifa=%p\n",
1536                  event, netdev_evt_str(event), ifa);
1537
1538         switch (event) {
1539         case NETDEV_UP:
1540                 BUG_ON(!ifa || !ifa->idev);
1541                 dev = (struct net_device *)ifa->idev->dev;
1542                 iface_stat_create_ipv6(dev, ifa);
1543                 atomic64_inc(&qtu_events.iface_events);
1544                 break;
1545         case NETDEV_DOWN:
1546         case NETDEV_UNREGISTER:
1547                 BUG_ON(!ifa || !ifa->idev);
1548                 dev = (struct net_device *)ifa->idev->dev;
1549                 iface_stat_update(dev, event == NETDEV_DOWN);
1550                 atomic64_inc(&qtu_events.iface_events);
1551                 break;
1552         }
1553         return NOTIFY_DONE;
1554 }
1555
1556 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1557                                         unsigned long event, void *ptr)
1558 {
1559         struct in_ifaddr *ifa = ptr;
1560         struct net_device *dev;
1561
1562         if (unlikely(module_passive))
1563                 return NOTIFY_DONE;
1564
1565         IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1566                  "ev=0x%lx/%s ifa=%p\n",
1567                  event, netdev_evt_str(event), ifa);
1568
1569         switch (event) {
1570         case NETDEV_UP:
1571                 BUG_ON(!ifa || !ifa->ifa_dev);
1572                 dev = ifa->ifa_dev->dev;
1573                 iface_stat_create(dev, ifa);
1574                 atomic64_inc(&qtu_events.iface_events);
1575                 break;
1576         case NETDEV_DOWN:
1577         case NETDEV_UNREGISTER:
1578                 BUG_ON(!ifa || !ifa->ifa_dev);
1579                 dev = ifa->ifa_dev->dev;
1580                 iface_stat_update(dev, event == NETDEV_DOWN);
1581                 atomic64_inc(&qtu_events.iface_events);
1582                 break;
1583         }
1584         return NOTIFY_DONE;
1585 }
1586
1587 static struct notifier_block iface_netdev_notifier_blk = {
1588         .notifier_call = iface_netdev_event_handler,
1589 };
1590
1591 static struct notifier_block iface_inetaddr_notifier_blk = {
1592         .notifier_call = iface_inetaddr_event_handler,
1593 };
1594
1595 static struct notifier_block iface_inet6addr_notifier_blk = {
1596         .notifier_call = iface_inet6addr_event_handler,
1597 };
1598
1599 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1600 {
1601         int err;
1602
1603         iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1604         if (!iface_stat_procdir) {
1605                 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1606                 err = -1;
1607                 goto err;
1608         }
1609
1610         iface_stat_all_procfile = create_proc_entry(iface_stat_all_procfilename,
1611                                                     proc_iface_perms,
1612                                                     parent_procdir);
1613         if (!iface_stat_all_procfile) {
1614                 pr_err("qtaguid: iface_stat: init "
1615                        " failed to create stat_old proc entry\n");
1616                 err = -1;
1617                 goto err_zap_entry;
1618         }
1619         iface_stat_all_procfile->read_proc = iface_stat_fmt_proc_read;
1620         iface_stat_all_procfile->data = (void *)1; /* fmt1 */
1621
1622         iface_stat_fmt_procfile = create_proc_entry(iface_stat_fmt_procfilename,
1623                                                     proc_iface_perms,
1624                                                     parent_procdir);
1625         if (!iface_stat_fmt_procfile) {
1626                 pr_err("qtaguid: iface_stat: init "
1627                        " failed to create stat_all proc entry\n");
1628                 err = -1;
1629                 goto err_zap_all_stats_entry;
1630         }
1631         iface_stat_fmt_procfile->read_proc = iface_stat_fmt_proc_read;
1632         iface_stat_fmt_procfile->data = (void *)2; /* fmt2 */
1633
1634
1635         err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1636         if (err) {
1637                 pr_err("qtaguid: iface_stat: init "
1638                        "failed to register dev event handler\n");
1639                 goto err_zap_all_stats_entries;
1640         }
1641         err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1642         if (err) {
1643                 pr_err("qtaguid: iface_stat: init "
1644                        "failed to register ipv4 dev event handler\n");
1645                 goto err_unreg_nd;
1646         }
1647
1648         err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1649         if (err) {
1650                 pr_err("qtaguid: iface_stat: init "
1651                        "failed to register ipv6 dev event handler\n");
1652                 goto err_unreg_ip4_addr;
1653         }
1654         return 0;
1655
1656 err_unreg_ip4_addr:
1657         unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1658 err_unreg_nd:
1659         unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1660 err_zap_all_stats_entries:
1661         remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
1662 err_zap_all_stats_entry:
1663         remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1664 err_zap_entry:
1665         remove_proc_entry(iface_stat_procdirname, parent_procdir);
1666 err:
1667         return err;
1668 }
1669
1670 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1671                                     struct xt_action_param *par)
1672 {
1673         struct sock *sk;
1674         unsigned int hook_mask = (1 << par->hooknum);
1675
1676         MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1677                  par->hooknum, par->family);
1678
1679         /*
1680          * Let's not abuse the the xt_socket_get*_sk(), or else it will
1681          * return garbage SKs.
1682          */
1683         if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1684                 return NULL;
1685
1686         switch (par->family) {
1687         case NFPROTO_IPV6:
1688                 sk = xt_socket_get6_sk(skb, par);
1689                 break;
1690         case NFPROTO_IPV4:
1691                 sk = xt_socket_get4_sk(skb, par);
1692                 break;
1693         default:
1694                 return NULL;
1695         }
1696
1697         if (sk) {
1698                 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1699                          "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1700                 /*
1701                  * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1702                  * "struct inet_timewait_sock" which is missing fields.
1703                  */
1704                 if (sk->sk_state  == TCP_TIME_WAIT) {
1705                         xt_socket_put_sk(sk);
1706                         sk = NULL;
1707                 }
1708         }
1709         return sk;
1710 }
1711
1712 static void account_for_uid(const struct sk_buff *skb,
1713                             const struct sock *alternate_sk, uid_t uid,
1714                             struct xt_action_param *par)
1715 {
1716         const struct net_device *el_dev;
1717
1718         if (!skb->dev) {
1719                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1720                 el_dev = par->in ? : par->out;
1721         } else {
1722                 const struct net_device *other_dev;
1723                 el_dev = skb->dev;
1724                 other_dev = par->in ? : par->out;
1725                 if (el_dev != other_dev) {
1726                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1727                                 "par->(in/out)=%p %s\n",
1728                                 par->hooknum, el_dev, el_dev->name, other_dev,
1729                                 other_dev->name);
1730                 }
1731         }
1732
1733         if (unlikely(!el_dev)) {
1734                 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1735         } else if (unlikely(!el_dev->name)) {
1736                 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1737         } else {
1738                 int proto = ipx_proto(skb, par);
1739                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1740                          par->hooknum, el_dev->name, el_dev->type,
1741                          par->family, proto);
1742
1743                 if_tag_stat_update(el_dev->name, uid,
1744                                 skb->sk ? skb->sk : alternate_sk,
1745                                 par->in ? IFS_RX : IFS_TX,
1746                                 proto, skb->len);
1747         }
1748 }
1749
1750 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1751 {
1752         const struct xt_qtaguid_match_info *info = par->matchinfo;
1753         const struct file *filp;
1754         bool got_sock = false;
1755         struct sock *sk;
1756         uid_t sock_uid;
1757         bool res;
1758
1759         if (unlikely(module_passive))
1760                 return (info->match ^ info->invert) == 0;
1761
1762         MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1763                  par->hooknum, skb, par->in, par->out, par->family);
1764
1765         atomic64_inc(&qtu_events.match_calls);
1766         if (skb == NULL) {
1767                 res = (info->match ^ info->invert) == 0;
1768                 goto ret_res;
1769         }
1770
1771         switch (par->hooknum) {
1772         case NF_INET_PRE_ROUTING:
1773         case NF_INET_POST_ROUTING:
1774                 atomic64_inc(&qtu_events.match_calls_prepost);
1775                 iface_stat_update_from_skb(skb, par);
1776                 /*
1777                  * We are done in pre/post. The skb will get processed
1778                  * further alter.
1779                  */
1780                 res = (info->match ^ info->invert);
1781                 goto ret_res;
1782                 break;
1783         /* default: Fall through and do UID releated work */
1784         }
1785
1786         sk = skb->sk;
1787         /*
1788          * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1789          * "struct inet_timewait_sock" which is missing fields.
1790          * So we ignore it.
1791          */
1792         if (sk && sk->sk_state == TCP_TIME_WAIT)
1793                 sk = NULL;
1794         if (sk == NULL) {
1795                 /*
1796                  * A missing sk->sk_socket happens when packets are in-flight
1797                  * and the matching socket is already closed and gone.
1798                  */
1799                 sk = qtaguid_find_sk(skb, par);
1800                 /*
1801                  * If we got the socket from the find_sk(), we will need to put
1802                  * it back, as nf_tproxy_get_sock_v4() got it.
1803                  */
1804                 got_sock = sk;
1805                 if (sk)
1806                         atomic64_inc(&qtu_events.match_found_sk_in_ct);
1807                 else
1808                         atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1809         } else {
1810                 atomic64_inc(&qtu_events.match_found_sk);
1811         }
1812         MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1813                  par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
1814         if (sk != NULL) {
1815                 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1816                         par->hooknum, sk, sk->sk_socket,
1817                         sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1818                 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1819                 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1820                         par->hooknum, filp ? filp->f_cred->fsuid : -1);
1821         }
1822
1823         if (sk == NULL || sk->sk_socket == NULL) {
1824                 /*
1825                  * Here, the qtaguid_find_sk() using connection tracking
1826                  * couldn't find the owner, so for now we just count them
1827                  * against the system.
1828                  */
1829                 /*
1830                  * TODO: unhack how to force just accounting.
1831                  * For now we only do iface stats when the uid-owner is not
1832                  * requested.
1833                  */
1834                 if (!(info->match & XT_QTAGUID_UID))
1835                         account_for_uid(skb, sk, 0, par);
1836                 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1837                         par->hooknum,
1838                         sk ? sk->sk_socket : NULL);
1839                 res = (info->match ^ info->invert) == 0;
1840                 atomic64_inc(&qtu_events.match_no_sk);
1841                 goto put_sock_ret_res;
1842         } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1843                 res = false;
1844                 goto put_sock_ret_res;
1845         }
1846         filp = sk->sk_socket->file;
1847         if (filp == NULL) {
1848                 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1849                 account_for_uid(skb, sk, 0, par);
1850                 res = ((info->match ^ info->invert) &
1851                         (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1852                 atomic64_inc(&qtu_events.match_no_sk_file);
1853                 goto put_sock_ret_res;
1854         }
1855         sock_uid = filp->f_cred->fsuid;
1856         /*
1857          * TODO: unhack how to force just accounting.
1858          * For now we only do iface stats when the uid-owner is not requested
1859          */
1860         if (!(info->match & XT_QTAGUID_UID))
1861                 account_for_uid(skb, sk, sock_uid, par);
1862
1863         /*
1864          * The following two tests fail the match when:
1865          *    id not in range AND no inverted condition requested
1866          * or id     in range AND    inverted condition requested
1867          * Thus (!a && b) || (a && !b) == a ^ b
1868          */
1869         if (info->match & XT_QTAGUID_UID)
1870                 if ((filp->f_cred->fsuid >= info->uid_min &&
1871                      filp->f_cred->fsuid <= info->uid_max) ^
1872                     !(info->invert & XT_QTAGUID_UID)) {
1873                         MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1874                                  par->hooknum);
1875                         res = false;
1876                         goto put_sock_ret_res;
1877                 }
1878         if (info->match & XT_QTAGUID_GID)
1879                 if ((filp->f_cred->fsgid >= info->gid_min &&
1880                                 filp->f_cred->fsgid <= info->gid_max) ^
1881                         !(info->invert & XT_QTAGUID_GID)) {
1882                         MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1883                                 par->hooknum);
1884                         res = false;
1885                         goto put_sock_ret_res;
1886                 }
1887
1888         MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1889         res = true;
1890
1891 put_sock_ret_res:
1892         if (got_sock)
1893                 xt_socket_put_sk(sk);
1894 ret_res:
1895         MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1896         return res;
1897 }
1898
1899 #ifdef DDEBUG
1900 /* This function is not in xt_qtaguid_print.c because of locks visibility */
1901 static void prdebug_full_state(int indent_level, const char *fmt, ...)
1902 {
1903         va_list args;
1904         char *fmt_buff;
1905         char *buff;
1906
1907         if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1908                 return;
1909
1910         fmt_buff = kasprintf(GFP_ATOMIC,
1911                              "qtaguid: %s(): %s {\n", __func__, fmt);
1912         BUG_ON(!fmt_buff);
1913         va_start(args, fmt);
1914         buff = kvasprintf(GFP_ATOMIC,
1915                           fmt_buff, args);
1916         BUG_ON(!buff);
1917         pr_debug("%s", buff);
1918         kfree(fmt_buff);
1919         kfree(buff);
1920         va_end(args);
1921
1922         spin_lock_bh(&sock_tag_list_lock);
1923         prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1924         spin_unlock_bh(&sock_tag_list_lock);
1925
1926         spin_lock_bh(&sock_tag_list_lock);
1927         spin_lock_bh(&uid_tag_data_tree_lock);
1928         prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1929         prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1930         spin_unlock_bh(&uid_tag_data_tree_lock);
1931         spin_unlock_bh(&sock_tag_list_lock);
1932
1933         spin_lock_bh(&iface_stat_list_lock);
1934         prdebug_iface_stat_list(indent_level, &iface_stat_list);
1935         spin_unlock_bh(&iface_stat_list_lock);
1936
1937         pr_debug("qtaguid: %s(): }\n", __func__);
1938 }
1939 #else
1940 static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1941 #endif
1942
1943 /*
1944  * Procfs reader to get all active socket tags using style "1)" as described in
1945  * fs/proc/generic.c
1946  */
1947 static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
1948                                   off_t items_to_skip, int char_count, int *eof,
1949                                   void *data)
1950 {
1951         char *outp = page;
1952         int len;
1953         uid_t uid;
1954         struct rb_node *node;
1955         struct sock_tag *sock_tag_entry;
1956         int item_index = 0;
1957         int indent_level = 0;
1958         long f_count;
1959
1960         if (unlikely(module_passive)) {
1961                 *eof = 1;
1962                 return 0;
1963         }
1964
1965         if (*eof)
1966                 return 0;
1967
1968         CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u "
1969                  "page=%p off=%ld char_count=%d *eof=%d\n",
1970                  current->pid, current->tgid, current_fsuid(),
1971                  page, items_to_skip, char_count, *eof);
1972
1973         spin_lock_bh(&sock_tag_list_lock);
1974         for (node = rb_first(&sock_tag_tree);
1975              node;
1976              node = rb_next(node)) {
1977                 if (item_index++ < items_to_skip)
1978                         continue;
1979                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1980                 uid = get_uid_from_tag(sock_tag_entry->tag);
1981                 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1982                          "pid=%u\n",
1983                          sock_tag_entry->sk,
1984                          sock_tag_entry->tag,
1985                          uid,
1986                          sock_tag_entry->pid
1987                         );
1988                 f_count = atomic_long_read(
1989                         &sock_tag_entry->socket->file->f_count);
1990                 len = snprintf(outp, char_count,
1991                                "sock=%p tag=0x%llx (uid=%u) pid=%u "
1992                                "f_count=%lu\n",
1993                                sock_tag_entry->sk,
1994                                sock_tag_entry->tag, uid,
1995                                sock_tag_entry->pid, f_count);
1996                 if (len >= char_count) {
1997                         spin_unlock_bh(&sock_tag_list_lock);
1998                         *outp = '\0';
1999                         return outp - page;
2000                 }
2001                 outp += len;
2002                 char_count -= len;
2003                 (*num_items_returned)++;
2004         }
2005         spin_unlock_bh(&sock_tag_list_lock);
2006
2007         if (item_index++ >= items_to_skip) {
2008                 len = snprintf(outp, char_count,
2009                                "events: sockets_tagged=%llu "
2010                                "sockets_untagged=%llu "
2011                                "counter_set_changes=%llu "
2012                                "delete_cmds=%llu "
2013                                "iface_events=%llu "
2014                                "match_calls=%llu "
2015                                "match_calls_prepost=%llu "
2016                                "match_found_sk=%llu "
2017                                "match_found_sk_in_ct=%llu "
2018                                "match_found_no_sk_in_ct=%llu "
2019                                "match_no_sk=%llu "
2020                                "match_no_sk_file=%llu\n",
2021                                atomic64_read(&qtu_events.sockets_tagged),
2022                                atomic64_read(&qtu_events.sockets_untagged),
2023                                atomic64_read(&qtu_events.counter_set_changes),
2024                                atomic64_read(&qtu_events.delete_cmds),
2025                                atomic64_read(&qtu_events.iface_events),
2026                                atomic64_read(&qtu_events.match_calls),
2027                                atomic64_read(&qtu_events.match_calls_prepost),
2028                                atomic64_read(&qtu_events.match_found_sk),
2029                                atomic64_read(&qtu_events.match_found_sk_in_ct),
2030                                atomic64_read(
2031                                        &qtu_events.match_found_no_sk_in_ct),
2032                                atomic64_read(&qtu_events.match_no_sk),
2033                                atomic64_read(&qtu_events.match_no_sk_file));
2034                 if (len >= char_count) {
2035                         *outp = '\0';
2036                         return outp - page;
2037                 }
2038                 outp += len;
2039                 char_count -= len;
2040                 (*num_items_returned)++;
2041         }
2042
2043         /* Count the following as part of the last item_index */
2044         if (item_index > items_to_skip) {
2045                 prdebug_full_state(indent_level, "proc ctrl");
2046         }
2047
2048         *eof = 1;
2049         return outp - page;
2050 }
2051
2052 /*
2053  * Delete socket tags, and stat tags associated with a given
2054  * accouting tag and uid.
2055  */
2056 static int ctrl_cmd_delete(const char *input)
2057 {
2058         char cmd;
2059         uid_t uid;
2060         uid_t entry_uid;
2061         tag_t acct_tag;
2062         tag_t tag;
2063         int res, argc;
2064         struct iface_stat *iface_entry;
2065         struct rb_node *node;
2066         struct sock_tag *st_entry;
2067         struct rb_root st_to_free_tree = RB_ROOT;
2068         struct tag_stat *ts_entry;
2069         struct tag_counter_set *tcs_entry;
2070         struct tag_ref *tr_entry;
2071         struct uid_tag_data *utd_entry;
2072
2073         argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
2074         CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
2075                  "user_tag=0x%llx uid=%u\n", input, argc, cmd,
2076                  acct_tag, uid);
2077         if (argc < 2) {
2078                 res = -EINVAL;
2079                 goto err;
2080         }
2081         if (!valid_atag(acct_tag)) {
2082                 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
2083                 res = -EINVAL;
2084                 goto err;
2085         }
2086         if (argc < 3) {
2087                 uid = current_fsuid();
2088         } else if (!can_impersonate_uid(uid)) {
2089                 pr_info("qtaguid: ctrl_delete(%s): "
2090                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2091                         input, current->pid, current->tgid, current_fsuid());
2092                 res = -EPERM;
2093                 goto err;
2094         }
2095
2096         tag = combine_atag_with_uid(acct_tag, uid);
2097         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2098                  "looking for tag=0x%llx (uid=%u)\n",
2099                  input, tag, uid);
2100
2101         /* Delete socket tags */
2102         spin_lock_bh(&sock_tag_list_lock);
2103         node = rb_first(&sock_tag_tree);
2104         while (node) {
2105                 st_entry = rb_entry(node, struct sock_tag, sock_node);
2106                 entry_uid = get_uid_from_tag(st_entry->tag);
2107                 node = rb_next(node);
2108                 if (entry_uid != uid)
2109                         continue;
2110
2111                 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
2112                          input, st_entry->tag, entry_uid);
2113
2114                 if (!acct_tag || st_entry->tag == tag) {
2115                         rb_erase(&st_entry->sock_node, &sock_tag_tree);
2116                         /* Can't sockfd_put() within spinlock, do it later. */
2117                         sock_tag_tree_insert(st_entry, &st_to_free_tree);
2118                         tr_entry = lookup_tag_ref(st_entry->tag, NULL);
2119                         BUG_ON(tr_entry->num_sock_tags <= 0);
2120                         tr_entry->num_sock_tags--;
2121                         /*
2122                          * TODO: remove if, and start failing.
2123                          * This is a hack to work around the fact that in some
2124                          * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
2125                          * and are trying to work around apps
2126                          * that didn't open the /dev/xt_qtaguid.
2127                          */
2128                         if (st_entry->list.next && st_entry->list.prev)
2129                                 list_del(&st_entry->list);
2130                 }
2131         }
2132         spin_unlock_bh(&sock_tag_list_lock);
2133
2134         sock_tag_tree_erase(&st_to_free_tree);
2135
2136         /* Delete tag counter-sets */
2137         spin_lock_bh(&tag_counter_set_list_lock);
2138         /* Counter sets are only on the uid tag, not full tag */
2139         tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2140         if (tcs_entry) {
2141                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2142                          "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
2143                          input,
2144                          tcs_entry->tn.tag,
2145                          get_uid_from_tag(tcs_entry->tn.tag),
2146                          tcs_entry->active_set);
2147                 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
2148                 kfree(tcs_entry);
2149         }
2150         spin_unlock_bh(&tag_counter_set_list_lock);
2151
2152         /*
2153          * If acct_tag is 0, then all entries belonging to uid are
2154          * erased.
2155          */
2156         spin_lock_bh(&iface_stat_list_lock);
2157         list_for_each_entry(iface_entry, &iface_stat_list, list) {
2158                 spin_lock_bh(&iface_entry->tag_stat_list_lock);
2159                 node = rb_first(&iface_entry->tag_stat_tree);
2160                 while (node) {
2161                         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2162                         entry_uid = get_uid_from_tag(ts_entry->tn.tag);
2163                         node = rb_next(node);
2164
2165                         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2166                                  "ts tag=0x%llx (uid=%u)\n",
2167                                  input, ts_entry->tn.tag, entry_uid);
2168
2169                         if (entry_uid != uid)
2170                                 continue;
2171                         if (!acct_tag || ts_entry->tn.tag == tag) {
2172                                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2173                                          "erase ts: %s 0x%llx %u\n",
2174                                          input, iface_entry->ifname,
2175                                          get_atag_from_tag(ts_entry->tn.tag),
2176                                          entry_uid);
2177                                 rb_erase(&ts_entry->tn.node,
2178                                          &iface_entry->tag_stat_tree);
2179                                 kfree(ts_entry);
2180                         }
2181                 }
2182                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
2183         }
2184         spin_unlock_bh(&iface_stat_list_lock);
2185
2186         /* Cleanup the uid_tag_data */
2187         spin_lock_bh(&uid_tag_data_tree_lock);
2188         node = rb_first(&uid_tag_data_tree);
2189         while (node) {
2190                 utd_entry = rb_entry(node, struct uid_tag_data, node);
2191                 entry_uid = utd_entry->uid;
2192                 node = rb_next(node);
2193
2194                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2195                          "utd uid=%u\n",
2196                          input, entry_uid);
2197
2198                 if (entry_uid != uid)
2199                         continue;
2200                 /*
2201                  * Go over the tag_refs, and those that don't have
2202                  * sock_tags using them are freed.
2203                  */
2204                 put_tag_ref_tree(tag, utd_entry);
2205                 put_utd_entry(utd_entry);
2206         }
2207         spin_unlock_bh(&uid_tag_data_tree_lock);
2208
2209         atomic64_inc(&qtu_events.delete_cmds);
2210         res = 0;
2211
2212 err:
2213         return res;
2214 }
2215
2216 static int ctrl_cmd_counter_set(const char *input)
2217 {
2218         char cmd;
2219         uid_t uid = 0;
2220         tag_t tag;
2221         int res, argc;
2222         struct tag_counter_set *tcs;
2223         int counter_set;
2224
2225         argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2226         CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2227                  "set=%d uid=%u\n", input, argc, cmd,
2228                  counter_set, uid);
2229         if (argc != 3) {
2230                 res = -EINVAL;
2231                 goto err;
2232         }
2233         if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2234                 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2235                         input);
2236                 res = -EINVAL;
2237                 goto err;
2238         }
2239         if (!can_manipulate_uids()) {
2240                 pr_info("qtaguid: ctrl_counterset(%s): "
2241                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2242                         input, current->pid, current->tgid, current_fsuid());
2243                 res = -EPERM;
2244                 goto err;
2245         }
2246
2247         tag = make_tag_from_uid(uid);
2248         spin_lock_bh(&tag_counter_set_list_lock);
2249         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2250         if (!tcs) {
2251                 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2252                 if (!tcs) {
2253                         spin_unlock_bh(&tag_counter_set_list_lock);
2254                         pr_err("qtaguid: ctrl_counterset(%s): "
2255                                "failed to alloc counter set\n",
2256                                input);
2257                         res = -ENOMEM;
2258                         goto err;
2259                 }
2260                 tcs->tn.tag = tag;
2261                 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2262                 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2263                          "(uid=%u) set=%d\n",
2264                          input, tag, get_uid_from_tag(tag), counter_set);
2265         }
2266         tcs->active_set = counter_set;
2267         spin_unlock_bh(&tag_counter_set_list_lock);
2268         atomic64_inc(&qtu_events.counter_set_changes);
2269         res = 0;
2270
2271 err:
2272         return res;
2273 }
2274
2275 static int ctrl_cmd_tag(const char *input)
2276 {
2277         char cmd;
2278         int sock_fd = 0;
2279         uid_t uid = 0;
2280         tag_t acct_tag = make_atag_from_value(0);
2281         tag_t full_tag;
2282         struct socket *el_socket;
2283         int res, argc;
2284         struct sock_tag *sock_tag_entry;
2285         struct tag_ref *tag_ref_entry;
2286         struct uid_tag_data *uid_tag_data_entry;
2287         struct proc_qtu_data *pqd_entry;
2288
2289         /* Unassigned args will get defaulted later. */
2290         argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
2291         CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2292                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2293                  acct_tag, uid);
2294         if (argc < 2) {
2295                 res = -EINVAL;
2296                 goto err;
2297         }
2298         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2299         if (!el_socket) {
2300                 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2301                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2302                         input, sock_fd, res, current->pid, current->tgid,
2303                         current_fsuid());
2304                 goto err;
2305         }
2306         CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2307                  input, atomic_long_read(&el_socket->file->f_count),
2308                  el_socket->sk);
2309         if (argc < 3) {
2310                 acct_tag = make_atag_from_value(0);
2311         } else if (!valid_atag(acct_tag)) {
2312                 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2313                 res = -EINVAL;
2314                 goto err_put;
2315         }
2316         CT_DEBUG("qtaguid: ctrl_tag(%s): "
2317                  "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2318                  "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
2319                  input, current->pid, current->tgid, current_uid(),
2320                  current_euid(), current_fsuid(),
2321                  xt_qtaguid_ctrl_file->gid,
2322                  in_group_p(xt_qtaguid_ctrl_file->gid),
2323                  in_egroup_p(xt_qtaguid_ctrl_file->gid));
2324         if (argc < 4) {
2325                 uid = current_fsuid();
2326         } else if (!can_impersonate_uid(uid)) {
2327                 pr_info("qtaguid: ctrl_tag(%s): "
2328                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2329                         input, current->pid, current->tgid, current_fsuid());
2330                 res = -EPERM;
2331                 goto err_put;
2332         }
2333         full_tag = combine_atag_with_uid(acct_tag, uid);
2334
2335         spin_lock_bh(&sock_tag_list_lock);
2336         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2337         tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2338         if (IS_ERR(tag_ref_entry)) {
2339                 res = PTR_ERR(tag_ref_entry);
2340                 spin_unlock_bh(&sock_tag_list_lock);
2341                 goto err_put;
2342         }
2343         tag_ref_entry->num_sock_tags++;
2344         if (sock_tag_entry) {
2345                 struct tag_ref *prev_tag_ref_entry;
2346
2347                 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2348                          "st@%p ...->f_count=%ld\n",
2349                          input, el_socket->sk, sock_tag_entry,
2350                          atomic_long_read(&el_socket->file->f_count));
2351                 /*
2352                  * This is a re-tagging, so release the sock_fd that was
2353                  * locked at the time of the 1st tagging.
2354                  * There is still the ref from this call's sockfd_lookup() so
2355                  * it can be done within the spinlock.
2356                  */
2357                 sockfd_put(sock_tag_entry->socket);
2358                 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2359                                                     &uid_tag_data_entry);
2360                 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2361                 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2362                 prev_tag_ref_entry->num_sock_tags--;
2363                 sock_tag_entry->tag = full_tag;
2364         } else {
2365                 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2366                          input, el_socket->sk);
2367                 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2368                                          GFP_ATOMIC);
2369                 if (!sock_tag_entry) {
2370                         pr_err("qtaguid: ctrl_tag(%s): "
2371                                "socket tag alloc failed\n",
2372                                input);
2373                         spin_unlock_bh(&sock_tag_list_lock);
2374                         res = -ENOMEM;
2375                         goto err_tag_unref_put;
2376                 }
2377                 sock_tag_entry->sk = el_socket->sk;
2378                 sock_tag_entry->socket = el_socket;
2379                 sock_tag_entry->pid = current->tgid;
2380                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
2381                                                             uid);
2382                 spin_lock_bh(&uid_tag_data_tree_lock);
2383                 pqd_entry = proc_qtu_data_tree_search(
2384                         &proc_qtu_data_tree, current->tgid);
2385                 /*
2386                  * TODO: remove if, and start failing.
2387                  * At first, we want to catch user-space code that is not
2388                  * opening the /dev/xt_qtaguid.
2389                  */
2390                 if (IS_ERR_OR_NULL(pqd_entry))
2391                         pr_warn_once(
2392                                 "qtaguid: %s(): "
2393                                 "User space forgot to open /dev/xt_qtaguid? "
2394                                 "pid=%u tgid=%u uid=%u\n", __func__,
2395                                 current->pid, current->tgid,
2396                                 current_fsuid());
2397                 else
2398                         list_add(&sock_tag_entry->list,
2399                                  &pqd_entry->sock_tag_list);
2400                 spin_unlock_bh(&uid_tag_data_tree_lock);
2401
2402                 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2403                 atomic64_inc(&qtu_events.sockets_tagged);
2404         }
2405         spin_unlock_bh(&sock_tag_list_lock);
2406         /* We keep the ref to the socket (file) until it is untagged */
2407         CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2408                  input, sock_tag_entry,
2409                  atomic_long_read(&el_socket->file->f_count));
2410         return 0;
2411
2412 err_tag_unref_put:
2413         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2414         tag_ref_entry->num_sock_tags--;
2415         free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2416 err_put:
2417         CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2418                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2419         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2420         sockfd_put(el_socket);
2421         return res;
2422
2423 err:
2424         CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2425         return res;
2426 }
2427
2428 static int ctrl_cmd_untag(const char *input)
2429 {
2430         char cmd;
2431         int sock_fd = 0;
2432         struct socket *el_socket;
2433         int res, argc;
2434         struct sock_tag *sock_tag_entry;
2435         struct tag_ref *tag_ref_entry;
2436         struct uid_tag_data *utd_entry;
2437         struct proc_qtu_data *pqd_entry;
2438
2439         argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2440         CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2441                  input, argc, cmd, sock_fd);
2442         if (argc < 2) {
2443                 res = -EINVAL;
2444                 goto err;
2445         }
2446         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2447         if (!el_socket) {
2448                 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2449                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2450                         input, sock_fd, res, current->pid, current->tgid,
2451                         current_fsuid());
2452                 goto err;
2453         }
2454         CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2455                  input, atomic_long_read(&el_socket->file->f_count),
2456                  el_socket->sk);
2457         spin_lock_bh(&sock_tag_list_lock);
2458         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2459         if (!sock_tag_entry) {
2460                 spin_unlock_bh(&sock_tag_list_lock);
2461                 res = -EINVAL;
2462                 goto err_put;
2463         }
2464         /*
2465          * The socket already belongs to the current process
2466          * so it can do whatever it wants to it.
2467          */
2468         rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2469
2470         tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2471         BUG_ON(!tag_ref_entry);
2472         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2473         spin_lock_bh(&uid_tag_data_tree_lock);
2474         pqd_entry = proc_qtu_data_tree_search(
2475                 &proc_qtu_data_tree, current->tgid);
2476         /*
2477          * TODO: remove if, and start failing.
2478          * At first, we want to catch user-space code that is not
2479          * opening the /dev/xt_qtaguid.
2480          */
2481         if (IS_ERR_OR_NULL(pqd_entry))
2482                 pr_warn_once("qtaguid: %s(): "
2483                              "User space forgot to open /dev/xt_qtaguid? "
2484                              "pid=%u tgid=%u uid=%u\n", __func__,
2485                              current->pid, current->tgid, current_fsuid());
2486         else
2487                 list_del(&sock_tag_entry->list);
2488         spin_unlock_bh(&uid_tag_data_tree_lock);
2489         /*
2490          * We don't free tag_ref from the utd_entry here,
2491          * only during a cmd_delete().
2492          */
2493         tag_ref_entry->num_sock_tags--;
2494         spin_unlock_bh(&sock_tag_list_lock);
2495         /*
2496          * Release the sock_fd that was grabbed at tag time,
2497          * and once more for the sockfd_lookup() here.
2498          */
2499         sockfd_put(sock_tag_entry->socket);
2500         CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2501                  input, sock_tag_entry,
2502                  atomic_long_read(&el_socket->file->f_count) - 1);
2503         sockfd_put(el_socket);
2504
2505         kfree(sock_tag_entry);
2506         atomic64_inc(&qtu_events.sockets_untagged);
2507
2508         return 0;
2509
2510 err_put:
2511         CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2512                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2513         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2514         sockfd_put(el_socket);
2515         return res;
2516
2517 err:
2518         CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2519         return res;
2520 }
2521
2522 static int qtaguid_ctrl_parse(const char *input, int count)
2523 {
2524         char cmd;
2525         int res;
2526
2527         CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
2528                  input, current->pid, current->tgid, current_fsuid());
2529
2530         cmd = input[0];
2531         /* Collect params for commands */
2532         switch (cmd) {
2533         case 'd':
2534                 res = ctrl_cmd_delete(input);
2535                 break;
2536
2537         case 's':
2538                 res = ctrl_cmd_counter_set(input);
2539                 break;
2540
2541         case 't':
2542                 res = ctrl_cmd_tag(input);
2543                 break;
2544
2545         case 'u':
2546                 res = ctrl_cmd_untag(input);
2547                 break;
2548
2549         default:
2550                 res = -EINVAL;
2551                 goto err;
2552         }
2553         if (!res)
2554                 res = count;
2555 err:
2556         CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
2557         return res;
2558 }
2559
2560 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2561 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2562                         unsigned long count, void *data)
2563 {
2564         char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2565
2566         if (unlikely(module_passive))
2567                 return count;
2568
2569         if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2570                 return -EINVAL;
2571
2572         if (copy_from_user(input_buf, buffer, count))
2573                 return -EFAULT;
2574
2575         input_buf[count] = '\0';
2576         return qtaguid_ctrl_parse(input_buf, count);
2577 }
2578
2579 struct proc_print_info {
2580         char *outp;
2581         char **num_items_returned;
2582         struct iface_stat *iface_entry;
2583         struct tag_stat *ts_entry;
2584         int item_index;
2585         int items_to_skip;
2586         int char_count;
2587 };
2588
2589 static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
2590 {
2591         int len;
2592         struct data_counters *cnts;
2593
2594         if (!ppi->item_index) {
2595                 if (ppi->item_index++ < ppi->items_to_skip)
2596                         return 0;
2597                 len = snprintf(ppi->outp, ppi->char_count,
2598                                "idx iface acct_tag_hex uid_tag_int cnt_set "
2599                                "rx_bytes rx_packets "
2600                                "tx_bytes tx_packets "
2601                                "rx_tcp_bytes rx_tcp_packets "
2602                                "rx_udp_bytes rx_udp_packets "
2603                                "rx_other_bytes rx_other_packets "
2604                                "tx_tcp_bytes tx_tcp_packets "
2605                                "tx_udp_bytes tx_udp_packets "
2606                                "tx_other_bytes tx_other_packets\n");
2607         } else {
2608                 tag_t tag = ppi->ts_entry->tn.tag;
2609                 uid_t stat_uid = get_uid_from_tag(tag);
2610                 /* Detailed tags are not available to everybody */
2611                 if (get_atag_from_tag(tag)
2612                     && !can_read_other_uid_stats(stat_uid)) {
2613                         CT_DEBUG("qtaguid: stats line: "
2614                                  "%s 0x%llx %u: insufficient priv "
2615                                  "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
2616                                  ppi->iface_entry->ifname,
2617                                  get_atag_from_tag(tag), stat_uid,
2618                                  current->pid, current->tgid, current_fsuid(),
2619                                  xt_qtaguid_stats_file->gid);
2620                         return 0;
2621                 }
2622                 if (ppi->item_index++ < ppi->items_to_skip)
2623                         return 0;
2624                 cnts = &ppi->ts_entry->counters;
2625                 len = snprintf(
2626                         ppi->outp, ppi->char_count,
2627                         "%d %s 0x%llx %u %u "
2628                         "%llu %llu "
2629                         "%llu %llu "
2630                         "%llu %llu "
2631                         "%llu %llu "
2632                         "%llu %llu "
2633                         "%llu %llu "
2634                         "%llu %llu "
2635                         "%llu %llu\n",
2636                         ppi->item_index,
2637                         ppi->iface_entry->ifname,
2638                         get_atag_from_tag(tag),
2639                         stat_uid,
2640                         cnt_set,
2641                         dc_sum_bytes(cnts, cnt_set, IFS_RX),
2642                         dc_sum_packets(cnts, cnt_set, IFS_RX),
2643                         dc_sum_bytes(cnts, cnt_set, IFS_TX),
2644                         dc_sum_packets(cnts, cnt_set, IFS_TX),
2645                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2646                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2647                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2648                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2649                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2650                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2651                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2652                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2653                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2654                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2655                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2656                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2657         }
2658         return len;
2659 }
2660
2661 static bool pp_sets(struct proc_print_info *ppi)
2662 {
2663         int len;
2664         int counter_set;
2665         for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2666              counter_set++) {
2667                 len = pp_stats_line(ppi, counter_set);
2668                 if (len >= ppi->char_count) {
2669                         *ppi->outp = '\0';
2670                         return false;
2671                 }
2672                 if (len) {
2673                         ppi->outp += len;
2674                         ppi->char_count -= len;
2675                         (*ppi->num_items_returned)++;
2676                 }
2677         }
2678         return true;
2679 }
2680
2681 /*
2682  * Procfs reader to get all tag stats using style "1)" as described in
2683  * fs/proc/generic.c
2684  * Groups all protocols tx/rx bytes.
2685  */
2686 static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
2687                                 off_t items_to_skip, int char_count, int *eof,
2688                                 void *data)
2689 {
2690         struct proc_print_info ppi;
2691         int len;
2692
2693         ppi.outp = page;
2694         ppi.item_index = 0;
2695         ppi.char_count = char_count;
2696         ppi.num_items_returned = num_items_returned;
2697         ppi.items_to_skip = items_to_skip;
2698
2699         if (unlikely(module_passive)) {
2700                 len = pp_stats_line(&ppi, 0);
2701                 /* The header should always be shorter than the buffer. */
2702                 BUG_ON(len >= ppi.char_count);
2703                 (*num_items_returned)++;
2704                 *eof = 1;
2705                 return len;
2706         }
2707
2708         CT_DEBUG("qtaguid:proc stats pid=%u tgid=%u uid=%u "
2709                  "page=%p *num_items_returned=%p off=%ld "
2710                  "char_count=%d *eof=%d\n",
2711                  current->pid, current->tgid, current_fsuid(),
2712                  page, *num_items_returned,
2713                  items_to_skip, char_count, *eof);
2714
2715         if (*eof)
2716                 return 0;
2717
2718         /* The idx is there to help debug when things go belly up. */
2719         len = pp_stats_line(&ppi, 0);
2720         /* Don't advance the outp unless the whole line was printed */
2721         if (len >= ppi.char_count) {
2722                 *ppi.outp = '\0';
2723                 return ppi.outp - page;
2724         }
2725         if (len) {
2726                 ppi.outp += len;
2727                 ppi.char_count -= len;
2728                 (*num_items_returned)++;
2729         }
2730
2731         spin_lock_bh(&iface_stat_list_lock);
2732         list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
2733                 struct rb_node *node;
2734                 spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
2735                 for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
2736                      node;
2737                      node = rb_next(node)) {
2738                         ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
2739                         if (!pp_sets(&ppi)) {
2740                                 spin_unlock_bh(
2741                                         &ppi.iface_entry->tag_stat_list_lock);
2742                                 spin_unlock_bh(&iface_stat_list_lock);
2743                                 return ppi.outp - page;
2744                         }
2745                 }
2746                 spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
2747         }
2748         spin_unlock_bh(&iface_stat_list_lock);
2749
2750         *eof = 1;
2751         return ppi.outp - page;
2752 }
2753
2754 /*------------------------------------------*/
2755 static int qtudev_open(struct inode *inode, struct file *file)
2756 {
2757         struct uid_tag_data *utd_entry;
2758         struct proc_qtu_data  *pqd_entry;
2759         struct proc_qtu_data  *new_pqd_entry;
2760         int res;
2761         bool utd_entry_found;
2762
2763         if (unlikely(qtu_proc_handling_passive))
2764                 return 0;
2765
2766         DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2767                  current->pid, current->tgid, current_fsuid());
2768
2769         spin_lock_bh(&uid_tag_data_tree_lock);
2770
2771         /* Look for existing uid data, or alloc one. */
2772         utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
2773         if (IS_ERR_OR_NULL(utd_entry)) {
2774                 res = PTR_ERR(utd_entry);
2775                 goto err_unlock;
2776         }
2777
2778         /* Look for existing PID based proc_data */
2779         pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2780                                               current->tgid);
2781         if (pqd_entry) {
2782                 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2783                        "%s already opened\n",
2784                        current->pid, current->tgid, current_fsuid(),
2785                        QTU_DEV_NAME);
2786                 res = -EBUSY;
2787                 goto err_unlock_free_utd;
2788         }
2789
2790         new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2791         if (!new_pqd_entry) {
2792                 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2793                        "proc data alloc failed\n",
2794                        current->pid, current->tgid, current_fsuid());
2795                 res = -ENOMEM;
2796                 goto err_unlock_free_utd;
2797         }
2798         new_pqd_entry->pid = current->tgid;
2799         INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2800         new_pqd_entry->parent_tag_data = utd_entry;
2801         utd_entry->num_pqd++;
2802
2803         proc_qtu_data_tree_insert(new_pqd_entry,
2804                                   &proc_qtu_data_tree);
2805
2806         spin_unlock_bh(&uid_tag_data_tree_lock);
2807         DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2808                  current_fsuid(), new_pqd_entry);
2809         file->private_data = new_pqd_entry;
2810         return 0;
2811
2812 err_unlock_free_utd:
2813         if (!utd_entry_found) {
2814                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2815                 kfree(utd_entry);
2816         }
2817 err_unlock:
2818         spin_unlock_bh(&uid_tag_data_tree_lock);
2819         return res;
2820 }
2821
2822 static int qtudev_release(struct inode *inode, struct file *file)
2823 {
2824         struct proc_qtu_data  *pqd_entry = file->private_data;
2825         struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
2826         struct sock_tag *st_entry;
2827         struct rb_root st_to_free_tree = RB_ROOT;
2828         struct list_head *entry, *next;
2829         struct tag_ref *tr;
2830
2831         if (unlikely(qtu_proc_handling_passive))
2832                 return 0;
2833
2834         /*
2835          * Do not trust the current->pid, it might just be a kworker cleaning
2836          * up after a dead proc.
2837          */
2838         DR_DEBUG("qtaguid: qtudev_release(): "
2839                  "pid=%u tgid=%u uid=%u "
2840                  "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2841                  current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2842                  pqd_entry, pqd_entry->pid, utd_entry,
2843                  utd_entry->num_active_tags);
2844
2845         spin_lock_bh(&sock_tag_list_lock);
2846         spin_lock_bh(&uid_tag_data_tree_lock);
2847
2848         list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2849                 st_entry = list_entry(entry, struct sock_tag, list);
2850                 DR_DEBUG("qtaguid: %s(): "
2851                          "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2852                          __func__,
2853                          st_entry, st_entry->sk,
2854                          current->pid, current->tgid,
2855                          pqd_entry->parent_tag_data->uid);
2856
2857                 utd_entry = uid_tag_data_tree_search(
2858                         &uid_tag_data_tree,
2859                         get_uid_from_tag(st_entry->tag));
2860                 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2861                 DR_DEBUG("qtaguid: %s(): "
2862                          "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2863                          st_entry->tag, utd_entry);
2864                 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2865                                          st_entry->tag);
2866                 BUG_ON(!tr);
2867                 BUG_ON(tr->num_sock_tags <= 0);
2868                 tr->num_sock_tags--;
2869                 free_tag_ref_from_utd_entry(tr, utd_entry);
2870
2871                 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2872                 list_del(&st_entry->list);
2873                 /* Can't sockfd_put() within spinlock, do it later. */
2874                 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2875
2876                 /*
2877                  * Try to free the utd_entry if no other proc_qtu_data is
2878                  * using it (num_pqd is 0) and it doesn't have active tags
2879                  * (num_active_tags is 0).
2880                  */
2881                 put_utd_entry(utd_entry);
2882         }
2883
2884         rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2885         BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2886         pqd_entry->parent_tag_data->num_pqd--;
2887         put_utd_entry(pqd_entry->parent_tag_data);
2888         kfree(pqd_entry);
2889         file->private_data = NULL;
2890
2891         spin_unlock_bh(&uid_tag_data_tree_lock);
2892         spin_unlock_bh(&sock_tag_list_lock);
2893
2894
2895         sock_tag_tree_erase(&st_to_free_tree);
2896
2897         prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2898                            current->pid, current->tgid);
2899         return 0;
2900 }
2901
2902 /*------------------------------------------*/
2903 static const struct file_operations qtudev_fops = {
2904         .owner = THIS_MODULE,
2905         .open = qtudev_open,
2906         .release = qtudev_release,
2907 };
2908
2909 static struct miscdevice qtu_device = {
2910         .minor = MISC_DYNAMIC_MINOR,
2911         .name = QTU_DEV_NAME,
2912         .fops = &qtudev_fops,
2913         /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2914 };
2915
2916 /*------------------------------------------*/
2917 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2918 {
2919         int ret;
2920         *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2921         if (!*res_procdir) {
2922                 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2923                 ret = -ENOMEM;
2924                 goto no_dir;
2925         }
2926
2927         xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
2928                                                 *res_procdir);
2929         if (!xt_qtaguid_ctrl_file) {
2930                 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2931                         " file\n");
2932                 ret = -ENOMEM;
2933                 goto no_ctrl_entry;
2934         }
2935         xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
2936         xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
2937
2938         xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
2939                                                 *res_procdir);
2940         if (!xt_qtaguid_stats_file) {
2941                 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2942                         "file\n");
2943                 ret = -ENOMEM;
2944                 goto no_stats_entry;
2945         }
2946         xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
2947         /*
2948          * TODO: add support counter hacking
2949          * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2950          */
2951         return 0;
2952
2953 no_stats_entry:
2954         remove_proc_entry("ctrl", *res_procdir);
2955 no_ctrl_entry:
2956         remove_proc_entry("xt_qtaguid", NULL);
2957 no_dir:
2958         return ret;
2959 }
2960
2961 static struct xt_match qtaguid_mt_reg __read_mostly = {
2962         /*
2963          * This module masquerades as the "owner" module so that iptables
2964          * tools can deal with it.
2965          */
2966         .name       = "owner",
2967         .revision   = 1,
2968         .family     = NFPROTO_UNSPEC,
2969         .match      = qtaguid_mt,
2970         .matchsize  = sizeof(struct xt_qtaguid_match_info),
2971         .me         = THIS_MODULE,
2972 };
2973
2974 static int __init qtaguid_mt_init(void)
2975 {
2976         if (qtaguid_proc_register(&xt_qtaguid_procdir)
2977             || iface_stat_init(xt_qtaguid_procdir)
2978             || xt_register_match(&qtaguid_mt_reg)
2979             || misc_register(&qtu_device))
2980                 return -1;
2981         return 0;
2982 }
2983
2984 /*
2985  * TODO: allow unloading of the module.
2986  * For now stats are permanent.
2987  * Kconfig forces'y/n' and never an 'm'.
2988  */
2989
2990 module_init(qtaguid_mt_init);
2991 MODULE_AUTHOR("jpa <jpa@google.com>");
2992 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
2993 MODULE_LICENSE("GPL");
2994 MODULE_ALIAS("ipt_owner");
2995 MODULE_ALIAS("ip6t_owner");
2996 MODULE_ALIAS("ipt_qtaguid");
2997 MODULE_ALIAS("ip6t_qtaguid");