netfilter: xt_qtaguid: fix memory leak in seq_file handlers
[linux-3.10.git] / net / netfilter / xt_qtaguid.c
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /*
12  * There are run-time debug flags enabled via the debug_mask module param, or
13  * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14  */
15 #define DEBUG
16
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/netfilter/x_tables.h>
21 #include <linux/netfilter/xt_qtaguid.h>
22 #include <linux/ratelimit.h>
23 #include <linux/seq_file.h>
24 #include <linux/skbuff.h>
25 #include <linux/workqueue.h>
26 #include <net/addrconf.h>
27 #include <net/sock.h>
28 #include <net/tcp.h>
29 #include <net/udp.h>
30
31 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
32 #include <linux/netfilter_ipv6/ip6_tables.h>
33 #endif
34
35 #include <linux/netfilter/xt_socket.h>
36 #include "xt_qtaguid_internal.h"
37 #include "xt_qtaguid_print.h"
38 #include "../../fs/proc/internal.h"
39
40 /*
41  * We only use the xt_socket funcs within a similar context to avoid unexpected
42  * return values.
43  */
44 #define XT_SOCKET_SUPPORTED_HOOKS \
45         ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
46
47
48 static const char *module_procdirname = "xt_qtaguid";
49 static struct proc_dir_entry *xt_qtaguid_procdir;
50
51 static unsigned int proc_iface_perms = S_IRUGO;
52 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
53
54 static struct proc_dir_entry *xt_qtaguid_stats_file;
55 static unsigned int proc_stats_perms = S_IRUGO;
56 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
57
58 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
59
60 /* Everybody can write. But proc_ctrl_write_limited is true by default which
61  * limits what can be controlled. See the can_*() functions.
62  */
63 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
64 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
65
66 /* Limited by default, so the gid of the ctrl and stats proc entries
67  * will limit what can be done. See the can_*() functions.
68  */
69 static bool proc_stats_readall_limited = true;
70 static bool proc_ctrl_write_limited = true;
71
72 module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
73                    S_IRUGO | S_IWUSR);
74 module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
75                    S_IRUGO | S_IWUSR);
76
77 /*
78  * Limit the number of active tags (via socket tags) for a given UID.
79  * Multiple processes could share the UID.
80  */
81 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
82 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
83
84 /*
85  * After the kernel has initiallized this module, it is still possible
86  * to make it passive.
87  * Setting passive to Y:
88  *  - the iface stats handling will not act on notifications.
89  *  - iptables matches will never match.
90  *  - ctrl commands silently succeed.
91  *  - stats are always empty.
92  * This is mostly usefull when a bug is suspected.
93  */
94 static bool module_passive;
95 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
96
97 /*
98  * Control how qtaguid data is tracked per proc/uid.
99  * Setting tag_tracking_passive to Y:
100  *  - don't create proc specific structs to track tags
101  *  - don't check that active tag stats exceed some limits.
102  *  - don't clean up socket tags on process exits.
103  * This is mostly usefull when a bug is suspected.
104  */
105 static bool qtu_proc_handling_passive;
106 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
107                    S_IRUGO | S_IWUSR);
108
109 #define QTU_DEV_NAME "xt_qtaguid"
110
111 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
112 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
113
114 /*---------------------------------------------------------------------------*/
115 static const char *iface_stat_procdirname = "iface_stat";
116 static struct proc_dir_entry *iface_stat_procdir;
117 /*
118  * The iface_stat_all* will go away once userspace gets use to the new fields
119  * that have a format line.
120  */
121 static const char *iface_stat_all_procfilename = "iface_stat_all";
122 static struct proc_dir_entry *iface_stat_all_procfile;
123 static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
124 static struct proc_dir_entry *iface_stat_fmt_procfile;
125
126
127 static LIST_HEAD(iface_stat_list);
128 static DEFINE_SPINLOCK(iface_stat_list_lock);
129
130 static struct rb_root sock_tag_tree = RB_ROOT;
131 static DEFINE_SPINLOCK(sock_tag_list_lock);
132
133 static struct rb_root tag_counter_set_tree = RB_ROOT;
134 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
135
136 static struct rb_root uid_tag_data_tree = RB_ROOT;
137 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
138
139 static struct rb_root proc_qtu_data_tree = RB_ROOT;
140 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
141
142 static struct qtaguid_event_counts qtu_events;
143 /*----------------------------------------------*/
144 static bool can_manipulate_uids(void)
145 {
146         /* root pwnd */
147         return in_egroup_p(xt_qtaguid_ctrl_file->gid)
148                 || unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_limited)
149                 || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
150 }
151
152 static bool can_impersonate_uid(uid_t uid)
153 {
154         return uid == current_fsuid() || can_manipulate_uids();
155 }
156
157 static bool can_read_other_uid_stats(uid_t uid)
158 {
159         /* root pwnd */
160         return in_egroup_p(xt_qtaguid_stats_file->gid)
161                 || unlikely(!current_fsuid()) || uid == current_fsuid()
162                 || unlikely(!proc_stats_readall_limited)
163                 || unlikely(current_fsuid() == xt_qtaguid_ctrl_file->uid);
164 }
165
166 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
167                                   enum ifs_tx_rx direction,
168                                   enum ifs_proto ifs_proto,
169                                   int bytes,
170                                   int packets)
171 {
172         counters->bpc[set][direction][ifs_proto].bytes += bytes;
173         counters->bpc[set][direction][ifs_proto].packets += packets;
174 }
175
176 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
177 {
178         struct rb_node *node = root->rb_node;
179
180         while (node) {
181                 struct tag_node *data = rb_entry(node, struct tag_node, node);
182                 int result;
183                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
184                          " node=%p data=%p\n", tag, node, data);
185                 result = tag_compare(tag, data->tag);
186                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
187                          " data.tag=0x%llx (uid=%u) res=%d\n",
188                          tag, data->tag, get_uid_from_tag(data->tag), result);
189                 if (result < 0)
190                         node = node->rb_left;
191                 else if (result > 0)
192                         node = node->rb_right;
193                 else
194                         return data;
195         }
196         return NULL;
197 }
198
199 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
200 {
201         struct rb_node **new = &(root->rb_node), *parent = NULL;
202
203         /* Figure out where to put new node */
204         while (*new) {
205                 struct tag_node *this = rb_entry(*new, struct tag_node,
206                                                  node);
207                 int result = tag_compare(data->tag, this->tag);
208                 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
209                          " (uid=%u)\n", __func__,
210                          this->tag,
211                          get_uid_from_tag(this->tag));
212                 parent = *new;
213                 if (result < 0)
214                         new = &((*new)->rb_left);
215                 else if (result > 0)
216                         new = &((*new)->rb_right);
217                 else
218                         BUG();
219         }
220
221         /* Add new node and rebalance tree. */
222         rb_link_node(&data->node, parent, new);
223         rb_insert_color(&data->node, root);
224 }
225
226 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
227 {
228         tag_node_tree_insert(&data->tn, root);
229 }
230
231 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
232 {
233         struct tag_node *node = tag_node_tree_search(root, tag);
234         if (!node)
235                 return NULL;
236         return rb_entry(&node->node, struct tag_stat, tn.node);
237 }
238
239 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
240                                         struct rb_root *root)
241 {
242         tag_node_tree_insert(&data->tn, root);
243 }
244
245 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
246                                                            tag_t tag)
247 {
248         struct tag_node *node = tag_node_tree_search(root, tag);
249         if (!node)
250                 return NULL;
251         return rb_entry(&node->node, struct tag_counter_set, tn.node);
252
253 }
254
255 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
256 {
257         tag_node_tree_insert(&data->tn, root);
258 }
259
260 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
261 {
262         struct tag_node *node = tag_node_tree_search(root, tag);
263         if (!node)
264                 return NULL;
265         return rb_entry(&node->node, struct tag_ref, tn.node);
266 }
267
268 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
269                                              const struct sock *sk)
270 {
271         struct rb_node *node = root->rb_node;
272
273         while (node) {
274                 struct sock_tag *data = rb_entry(node, struct sock_tag,
275                                                  sock_node);
276                 if (sk < data->sk)
277                         node = node->rb_left;
278                 else if (sk > data->sk)
279                         node = node->rb_right;
280                 else
281                         return data;
282         }
283         return NULL;
284 }
285
286 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
287 {
288         struct rb_node **new = &(root->rb_node), *parent = NULL;
289
290         /* Figure out where to put new node */
291         while (*new) {
292                 struct sock_tag *this = rb_entry(*new, struct sock_tag,
293                                                  sock_node);
294                 parent = *new;
295                 if (data->sk < this->sk)
296                         new = &((*new)->rb_left);
297                 else if (data->sk > this->sk)
298                         new = &((*new)->rb_right);
299                 else
300                         BUG();
301         }
302
303         /* Add new node and rebalance tree. */
304         rb_link_node(&data->sock_node, parent, new);
305         rb_insert_color(&data->sock_node, root);
306 }
307
308 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
309 {
310         struct rb_node *node;
311         struct sock_tag *st_entry;
312
313         node = rb_first(st_to_free_tree);
314         while (node) {
315                 st_entry = rb_entry(node, struct sock_tag, sock_node);
316                 node = rb_next(node);
317                 CT_DEBUG("qtaguid: %s(): "
318                          "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
319                          st_entry->sk,
320                          st_entry->tag,
321                          get_uid_from_tag(st_entry->tag));
322                 rb_erase(&st_entry->sock_node, st_to_free_tree);
323                 sockfd_put(st_entry->socket);
324                 kfree(st_entry);
325         }
326 }
327
328 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
329                                                        const pid_t pid)
330 {
331         struct rb_node *node = root->rb_node;
332
333         while (node) {
334                 struct proc_qtu_data *data = rb_entry(node,
335                                                       struct proc_qtu_data,
336                                                       node);
337                 if (pid < data->pid)
338                         node = node->rb_left;
339                 else if (pid > data->pid)
340                         node = node->rb_right;
341                 else
342                         return data;
343         }
344         return NULL;
345 }
346
347 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
348                                       struct rb_root *root)
349 {
350         struct rb_node **new = &(root->rb_node), *parent = NULL;
351
352         /* Figure out where to put new node */
353         while (*new) {
354                 struct proc_qtu_data *this = rb_entry(*new,
355                                                       struct proc_qtu_data,
356                                                       node);
357                 parent = *new;
358                 if (data->pid < this->pid)
359                         new = &((*new)->rb_left);
360                 else if (data->pid > this->pid)
361                         new = &((*new)->rb_right);
362                 else
363                         BUG();
364         }
365
366         /* Add new node and rebalance tree. */
367         rb_link_node(&data->node, parent, new);
368         rb_insert_color(&data->node, root);
369 }
370
371 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
372                                      struct rb_root *root)
373 {
374         struct rb_node **new = &(root->rb_node), *parent = NULL;
375
376         /* Figure out where to put new node */
377         while (*new) {
378                 struct uid_tag_data *this = rb_entry(*new,
379                                                      struct uid_tag_data,
380                                                      node);
381                 parent = *new;
382                 if (data->uid < this->uid)
383                         new = &((*new)->rb_left);
384                 else if (data->uid > this->uid)
385                         new = &((*new)->rb_right);
386                 else
387                         BUG();
388         }
389
390         /* Add new node and rebalance tree. */
391         rb_link_node(&data->node, parent, new);
392         rb_insert_color(&data->node, root);
393 }
394
395 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
396                                                      uid_t uid)
397 {
398         struct rb_node *node = root->rb_node;
399
400         while (node) {
401                 struct uid_tag_data *data = rb_entry(node,
402                                                      struct uid_tag_data,
403                                                      node);
404                 if (uid < data->uid)
405                         node = node->rb_left;
406                 else if (uid > data->uid)
407                         node = node->rb_right;
408                 else
409                         return data;
410         }
411         return NULL;
412 }
413
414 /*
415  * Allocates a new uid_tag_data struct if needed.
416  * Returns a pointer to the found or allocated uid_tag_data.
417  * Returns a PTR_ERR on failures, and lock is not held.
418  * If found is not NULL:
419  *   sets *found to true if not allocated.
420  *   sets *found to false if allocated.
421  */
422 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
423 {
424         struct uid_tag_data *utd_entry;
425
426         /* Look for top level uid_tag_data for the UID */
427         utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
428         DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
429
430         if (found_res)
431                 *found_res = utd_entry;
432         if (utd_entry)
433                 return utd_entry;
434
435         utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
436         if (!utd_entry) {
437                 pr_err("qtaguid: get_uid_data(%u): "
438                        "tag data alloc failed\n", uid);
439                 return ERR_PTR(-ENOMEM);
440         }
441
442         utd_entry->uid = uid;
443         utd_entry->tag_ref_tree = RB_ROOT;
444         uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
445         DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
446         return utd_entry;
447 }
448
449 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
450 static struct tag_ref *new_tag_ref(tag_t new_tag,
451                                    struct uid_tag_data *utd_entry)
452 {
453         struct tag_ref *tr_entry;
454         int res;
455
456         if (utd_entry->num_active_tags + 1 > max_sock_tags) {
457                 pr_info("qtaguid: new_tag_ref(0x%llx): "
458                         "tag ref alloc quota exceeded. max=%d\n",
459                         new_tag, max_sock_tags);
460                 res = -EMFILE;
461                 goto err_res;
462
463         }
464
465         tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
466         if (!tr_entry) {
467                 pr_err("qtaguid: new_tag_ref(0x%llx): "
468                        "tag ref alloc failed\n",
469                        new_tag);
470                 res = -ENOMEM;
471                 goto err_res;
472         }
473         tr_entry->tn.tag = new_tag;
474         /* tr_entry->num_sock_tags  handled by caller */
475         utd_entry->num_active_tags++;
476         tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
477         DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
478                  " inserted new tag ref %p\n",
479                  new_tag, tr_entry);
480         return tr_entry;
481
482 err_res:
483         return ERR_PTR(res);
484 }
485
486 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
487                                       struct uid_tag_data **utd_res)
488 {
489         struct uid_tag_data *utd_entry;
490         struct tag_ref *tr_entry;
491         bool found_utd;
492         uid_t uid = get_uid_from_tag(full_tag);
493
494         DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
495                  full_tag, uid);
496
497         utd_entry = get_uid_data(uid, &found_utd);
498         if (IS_ERR_OR_NULL(utd_entry)) {
499                 if (utd_res)
500                         *utd_res = utd_entry;
501                 return NULL;
502         }
503
504         tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
505         if (utd_res)
506                 *utd_res = utd_entry;
507         DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
508                  full_tag, utd_entry, tr_entry);
509         return tr_entry;
510 }
511
512 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
513 static struct tag_ref *get_tag_ref(tag_t full_tag,
514                                    struct uid_tag_data **utd_res)
515 {
516         struct uid_tag_data *utd_entry;
517         struct tag_ref *tr_entry;
518
519         DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
520                  full_tag);
521         spin_lock_bh(&uid_tag_data_tree_lock);
522         tr_entry = lookup_tag_ref(full_tag, &utd_entry);
523         BUG_ON(IS_ERR_OR_NULL(utd_entry));
524         if (!tr_entry)
525                 tr_entry = new_tag_ref(full_tag, utd_entry);
526
527         spin_unlock_bh(&uid_tag_data_tree_lock);
528         if (utd_res)
529                 *utd_res = utd_entry;
530         DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
531                  full_tag, utd_entry, tr_entry);
532         return tr_entry;
533 }
534
535 /* Checks and maybe frees the UID Tag Data entry */
536 static void put_utd_entry(struct uid_tag_data *utd_entry)
537 {
538         /* Are we done with the UID tag data entry? */
539         if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
540                 !utd_entry->num_pqd) {
541                 DR_DEBUG("qtaguid: %s(): "
542                          "erase utd_entry=%p uid=%u "
543                          "by pid=%u tgid=%u uid=%u\n", __func__,
544                          utd_entry, utd_entry->uid,
545                          current->pid, current->tgid, current_fsuid());
546                 BUG_ON(utd_entry->num_active_tags);
547                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
548                 kfree(utd_entry);
549         } else {
550                 DR_DEBUG("qtaguid: %s(): "
551                          "utd_entry=%p still has %d tags %d proc_qtu_data\n",
552                          __func__, utd_entry, utd_entry->num_active_tags,
553                          utd_entry->num_pqd);
554                 BUG_ON(!(utd_entry->num_active_tags ||
555                          utd_entry->num_pqd));
556         }
557 }
558
559 /*
560  * If no sock_tags are using this tag_ref,
561  * decrements refcount of utd_entry, removes tr_entry
562  * from utd_entry->tag_ref_tree and frees.
563  */
564 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
565                                         struct uid_tag_data *utd_entry)
566 {
567         DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
568                  tr_entry, tr_entry->tn.tag,
569                  get_uid_from_tag(tr_entry->tn.tag));
570         if (!tr_entry->num_sock_tags) {
571                 BUG_ON(!utd_entry->num_active_tags);
572                 utd_entry->num_active_tags--;
573                 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
574                 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
575                 kfree(tr_entry);
576         }
577 }
578
579 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
580 {
581         struct rb_node *node;
582         struct tag_ref *tr_entry;
583         tag_t acct_tag;
584
585         DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
586                  full_tag, get_uid_from_tag(full_tag));
587         acct_tag = get_atag_from_tag(full_tag);
588         node = rb_first(&utd_entry->tag_ref_tree);
589         while (node) {
590                 tr_entry = rb_entry(node, struct tag_ref, tn.node);
591                 node = rb_next(node);
592                 if (!acct_tag || tr_entry->tn.tag == full_tag)
593                         free_tag_ref_from_utd_entry(tr_entry, utd_entry);
594         }
595 }
596
597 static int read_proc_u64(struct file *file, char __user *buf,
598                          size_t size, loff_t *ppos)
599 {
600         uint64_t *valuep = PDE_DATA(file_inode(file));
601         char tmp[24];
602         size_t tmp_size;
603
604         tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", *valuep);
605         return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
606 }
607
608 static int read_proc_bool(struct file *file, char __user *buf,
609                           size_t size, loff_t *ppos)
610 {
611         bool *valuep = PDE_DATA(file_inode(file));
612         char tmp[24];
613         size_t tmp_size;
614
615         tmp_size = scnprintf(tmp, sizeof(tmp), "%u\n", *valuep);
616         return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
617 }
618
619 static int get_active_counter_set(tag_t tag)
620 {
621         int active_set = 0;
622         struct tag_counter_set *tcs;
623
624         MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
625                  " (uid=%u)\n",
626                  tag, get_uid_from_tag(tag));
627         /* For now we only handle UID tags for active sets */
628         tag = get_utag_from_tag(tag);
629         spin_lock_bh(&tag_counter_set_list_lock);
630         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
631         if (tcs)
632                 active_set = tcs->active_set;
633         spin_unlock_bh(&tag_counter_set_list_lock);
634         return active_set;
635 }
636
637 /*
638  * Find the entry for tracking the specified interface.
639  * Caller must hold iface_stat_list_lock
640  */
641 static struct iface_stat *get_iface_entry(const char *ifname)
642 {
643         struct iface_stat *iface_entry;
644
645         /* Find the entry for tracking the specified tag within the interface */
646         if (ifname == NULL) {
647                 pr_info("qtaguid: iface_stat: get() NULL device name\n");
648                 return NULL;
649         }
650
651         /* Iterate over interfaces */
652         list_for_each_entry(iface_entry, &iface_stat_list, list) {
653                 if (!strcmp(ifname, iface_entry->ifname))
654                         goto done;
655         }
656         iface_entry = NULL;
657 done:
658         return iface_entry;
659 }
660
661 /* This is for fmt2 only */
662 static void pp_iface_stat_header(struct seq_file *m)
663 {
664         seq_puts(m,
665                  "ifname "
666                  "total_skb_rx_bytes total_skb_rx_packets "
667                  "total_skb_tx_bytes total_skb_tx_packets "
668                  "rx_tcp_bytes rx_tcp_packets "
669                  "rx_udp_bytes rx_udp_packets "
670                  "rx_other_bytes rx_other_packets "
671                  "tx_tcp_bytes tx_tcp_packets "
672                  "tx_udp_bytes tx_udp_packets "
673                  "tx_other_bytes tx_other_packets\n"
674         );
675 }
676
677 static void pp_iface_stat_line(struct seq_file *m,
678                                struct iface_stat *iface_entry)
679 {
680         struct data_counters *cnts;
681         int cnt_set = 0;   /* We only use one set for the device */
682         cnts = &iface_entry->totals_via_skb;
683         seq_printf(m, "%s %llu %llu %llu %llu %llu %llu %llu %llu "
684                    "%llu %llu %llu %llu %llu %llu %llu %llu\n",
685                    iface_entry->ifname,
686                    dc_sum_bytes(cnts, cnt_set, IFS_RX),
687                    dc_sum_packets(cnts, cnt_set, IFS_RX),
688                    dc_sum_bytes(cnts, cnt_set, IFS_TX),
689                    dc_sum_packets(cnts, cnt_set, IFS_TX),
690                    cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
691                    cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
692                    cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
693                    cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
694                    cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
695                    cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
696                    cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
697                    cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
698                    cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
699                    cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
700                    cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
701                    cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
702 }
703
704 struct proc_iface_stat_fmt_info {
705         int fmt;
706 };
707
708 static void *iface_stat_fmt_proc_start(struct seq_file *m, loff_t *pos)
709 {
710         struct proc_iface_stat_fmt_info *p = m->private;
711         loff_t n = *pos;
712
713         /*
714          * This lock will prevent iface_stat_update() from changing active,
715          * and in turn prevent an interface from unregistering itself.
716          */
717         spin_lock_bh(&iface_stat_list_lock);
718
719         if (unlikely(module_passive))
720                 return NULL;
721
722         if (!n && p->fmt == 2)
723                 pp_iface_stat_header(m);
724
725         return seq_list_start(&iface_stat_list, n);
726 }
727
728 static void *iface_stat_fmt_proc_next(struct seq_file *m, void *p, loff_t *pos)
729 {
730         return seq_list_next(p, &iface_stat_list, pos);
731 }
732
733 static void iface_stat_fmt_proc_stop(struct seq_file *m, void *p)
734 {
735         spin_unlock_bh(&iface_stat_list_lock);
736 }
737
738 static int iface_stat_fmt_proc_show(struct seq_file *m, void *v)
739 {
740         struct proc_iface_stat_fmt_info *p = m->private;
741         struct iface_stat *iface_entry;
742         struct rtnl_link_stats64 dev_stats, *stats;
743         struct rtnl_link_stats64 no_dev_stats = {0};
744
745
746         CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n",
747                  current->pid, current->tgid, current_fsuid());
748
749         iface_entry = list_entry(v, struct iface_stat, list);
750
751         if (iface_entry->active) {
752                 stats = dev_get_stats(iface_entry->net_dev,
753                                       &dev_stats);
754         } else {
755                 stats = &no_dev_stats;
756         }
757         /*
758          * If the meaning of the data changes, then update the fmtX
759          * string.
760          */
761         if (p->fmt == 1) {
762                 seq_printf(m, "%s %d %llu %llu %llu %llu %llu %llu %llu %llu\n",
763                            iface_entry->ifname,
764                            iface_entry->active,
765                            iface_entry->totals_via_dev[IFS_RX].bytes,
766                            iface_entry->totals_via_dev[IFS_RX].packets,
767                            iface_entry->totals_via_dev[IFS_TX].bytes,
768                            iface_entry->totals_via_dev[IFS_TX].packets,
769                            stats->rx_bytes, stats->rx_packets,
770                            stats->tx_bytes, stats->tx_packets
771                            );
772         } else {
773                 pp_iface_stat_line(m, iface_entry);
774         }
775         return 0;
776 }
777
778 static const struct file_operations read_u64_fops = {
779         .read           = read_proc_u64,
780         .llseek         = default_llseek,
781 };
782
783 static const struct file_operations read_bool_fops = {
784         .read           = read_proc_bool,
785         .llseek         = default_llseek,
786 };
787
788 static void iface_create_proc_worker(struct work_struct *work)
789 {
790         struct proc_dir_entry *proc_entry;
791         struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
792                                                    iface_work);
793         struct iface_stat *new_iface  = isw->iface_entry;
794
795         /* iface_entries are not deleted, so safe to manipulate. */
796         proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
797         if (IS_ERR_OR_NULL(proc_entry)) {
798                 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
799                 kfree(isw);
800                 return;
801         }
802
803         new_iface->proc_ptr = proc_entry;
804
805         proc_create_data("tx_bytes", proc_iface_perms, proc_entry,
806                          &read_u64_fops,
807                          &new_iface->totals_via_dev[IFS_TX].bytes);
808         proc_create_data("rx_bytes", proc_iface_perms, proc_entry,
809                          &read_u64_fops,
810                          &new_iface->totals_via_dev[IFS_RX].bytes);
811         proc_create_data("tx_packets", proc_iface_perms, proc_entry,
812                          &read_u64_fops,
813                          &new_iface->totals_via_dev[IFS_TX].packets);
814         proc_create_data("rx_packets", proc_iface_perms, proc_entry,
815                          &read_u64_fops,
816                          &new_iface->totals_via_dev[IFS_RX].packets);
817         proc_create_data("active", proc_iface_perms, proc_entry,
818                          &read_bool_fops, &new_iface->active);
819
820         IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
821                  "entry=%p dev=%s\n", new_iface, new_iface->ifname);
822         kfree(isw);
823 }
824
825 /*
826  * Will set the entry's active state, and
827  * update the net_dev accordingly also.
828  */
829 static void _iface_stat_set_active(struct iface_stat *entry,
830                                    struct net_device *net_dev,
831                                    bool activate)
832 {
833         if (activate) {
834                 entry->net_dev = net_dev;
835                 entry->active = true;
836                 IF_DEBUG("qtaguid: %s(%s): "
837                          "enable tracking. rfcnt=%d\n", __func__,
838                          entry->ifname,
839                          __this_cpu_read(*net_dev->pcpu_refcnt));
840         } else {
841                 entry->active = false;
842                 entry->net_dev = NULL;
843                 IF_DEBUG("qtaguid: %s(%s): "
844                          "disable tracking. rfcnt=%d\n", __func__,
845                          entry->ifname,
846                          __this_cpu_read(*net_dev->pcpu_refcnt));
847
848         }
849 }
850
851 /* Caller must hold iface_stat_list_lock */
852 static struct iface_stat *iface_alloc(struct net_device *net_dev)
853 {
854         struct iface_stat *new_iface;
855         struct iface_stat_work *isw;
856
857         new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
858         if (new_iface == NULL) {
859                 pr_err("qtaguid: iface_stat: create(%s): "
860                        "iface_stat alloc failed\n", net_dev->name);
861                 return NULL;
862         }
863         new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
864         if (new_iface->ifname == NULL) {
865                 pr_err("qtaguid: iface_stat: create(%s): "
866                        "ifname alloc failed\n", net_dev->name);
867                 kfree(new_iface);
868                 return NULL;
869         }
870         spin_lock_init(&new_iface->tag_stat_list_lock);
871         new_iface->tag_stat_tree = RB_ROOT;
872         _iface_stat_set_active(new_iface, net_dev, true);
873
874         /*
875          * ipv6 notifier chains are atomic :(
876          * No create_proc_read_entry() for you!
877          */
878         isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
879         if (!isw) {
880                 pr_err("qtaguid: iface_stat: create(%s): "
881                        "work alloc failed\n", new_iface->ifname);
882                 _iface_stat_set_active(new_iface, net_dev, false);
883                 kfree(new_iface->ifname);
884                 kfree(new_iface);
885                 return NULL;
886         }
887         isw->iface_entry = new_iface;
888         INIT_WORK(&isw->iface_work, iface_create_proc_worker);
889         schedule_work(&isw->iface_work);
890         list_add(&new_iface->list, &iface_stat_list);
891         return new_iface;
892 }
893
894 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
895                                                struct iface_stat *iface)
896 {
897         struct rtnl_link_stats64 dev_stats, *stats;
898         bool stats_rewound;
899
900         stats = dev_get_stats(net_dev, &dev_stats);
901         /* No empty packets */
902         stats_rewound =
903                 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
904                 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
905
906         IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
907                  "bytes rx/tx=%llu/%llu "
908                  "active=%d last_known=%d "
909                  "stats_rewound=%d\n", __func__,
910                  net_dev ? net_dev->name : "?",
911                  iface, net_dev,
912                  stats->rx_bytes, stats->tx_bytes,
913                  iface->active, iface->last_known_valid, stats_rewound);
914
915         if (iface->active && iface->last_known_valid && stats_rewound) {
916                 pr_warn_once("qtaguid: iface_stat: %s(%s): "
917                              "iface reset its stats unexpectedly\n", __func__,
918                              net_dev->name);
919
920                 iface->totals_via_dev[IFS_TX].bytes +=
921                         iface->last_known[IFS_TX].bytes;
922                 iface->totals_via_dev[IFS_TX].packets +=
923                         iface->last_known[IFS_TX].packets;
924                 iface->totals_via_dev[IFS_RX].bytes +=
925                         iface->last_known[IFS_RX].bytes;
926                 iface->totals_via_dev[IFS_RX].packets +=
927                         iface->last_known[IFS_RX].packets;
928                 iface->last_known_valid = false;
929                 IF_DEBUG("qtaguid: %s(%s): iface=%p "
930                          "used last known bytes rx/tx=%llu/%llu\n", __func__,
931                          iface->ifname, iface, iface->last_known[IFS_RX].bytes,
932                          iface->last_known[IFS_TX].bytes);
933         }
934 }
935
936 /*
937  * Create a new entry for tracking the specified interface.
938  * Do nothing if the entry already exists.
939  * Called when an interface is configured with a valid IP address.
940  */
941 static void iface_stat_create(struct net_device *net_dev,
942                               struct in_ifaddr *ifa)
943 {
944         struct in_device *in_dev = NULL;
945         const char *ifname;
946         struct iface_stat *entry;
947         __be32 ipaddr = 0;
948         struct iface_stat *new_iface;
949
950         IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
951                  net_dev ? net_dev->name : "?",
952                  ifa, net_dev);
953         if (!net_dev) {
954                 pr_err("qtaguid: iface_stat: create(): no net dev\n");
955                 return;
956         }
957
958         ifname = net_dev->name;
959         if (!ifa) {
960                 in_dev = in_dev_get(net_dev);
961                 if (!in_dev) {
962                         pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
963                                ifname);
964                         return;
965                 }
966                 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
967                          ifname, in_dev);
968                 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
969                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
970                                  "ifa=%p ifa_label=%s\n",
971                                  ifname, ifa,
972                                  ifa->ifa_label ? ifa->ifa_label : "(null)");
973                         if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
974                                 break;
975                 }
976         }
977
978         if (!ifa) {
979                 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
980                          ifname);
981                 goto done_put;
982         }
983         ipaddr = ifa->ifa_local;
984
985         spin_lock_bh(&iface_stat_list_lock);
986         entry = get_iface_entry(ifname);
987         if (entry != NULL) {
988                 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
989                          ifname, entry);
990                 iface_check_stats_reset_and_adjust(net_dev, entry);
991                 _iface_stat_set_active(entry, net_dev, true);
992                 IF_DEBUG("qtaguid: %s(%s): "
993                          "tracking now %d on ip=%pI4\n", __func__,
994                          entry->ifname, true, &ipaddr);
995                 goto done_unlock_put;
996         }
997
998         new_iface = iface_alloc(net_dev);
999         IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1000                  "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1001 done_unlock_put:
1002         spin_unlock_bh(&iface_stat_list_lock);
1003 done_put:
1004         if (in_dev)
1005                 in_dev_put(in_dev);
1006 }
1007
1008 static void iface_stat_create_ipv6(struct net_device *net_dev,
1009                                    struct inet6_ifaddr *ifa)
1010 {
1011         struct in_device *in_dev;
1012         const char *ifname;
1013         struct iface_stat *entry;
1014         struct iface_stat *new_iface;
1015         int addr_type;
1016
1017         IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1018                  ifa, net_dev, net_dev ? net_dev->name : "");
1019         if (!net_dev) {
1020                 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1021                 return;
1022         }
1023         ifname = net_dev->name;
1024
1025         in_dev = in_dev_get(net_dev);
1026         if (!in_dev) {
1027                 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1028                        ifname);
1029                 return;
1030         }
1031
1032         IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1033                  ifname, in_dev);
1034
1035         if (!ifa) {
1036                 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1037                          ifname);
1038                 goto done_put;
1039         }
1040         addr_type = ipv6_addr_type(&ifa->addr);
1041
1042         spin_lock_bh(&iface_stat_list_lock);
1043         entry = get_iface_entry(ifname);
1044         if (entry != NULL) {
1045                 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1046                          ifname, entry);
1047                 iface_check_stats_reset_and_adjust(net_dev, entry);
1048                 _iface_stat_set_active(entry, net_dev, true);
1049                 IF_DEBUG("qtaguid: %s(%s): "
1050                          "tracking now %d on ip=%pI6c\n", __func__,
1051                          entry->ifname, true, &ifa->addr);
1052                 goto done_unlock_put;
1053         }
1054
1055         new_iface = iface_alloc(net_dev);
1056         IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1057                  "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1058
1059 done_unlock_put:
1060         spin_unlock_bh(&iface_stat_list_lock);
1061 done_put:
1062         in_dev_put(in_dev);
1063 }
1064
1065 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1066 {
1067         MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1068         return sock_tag_tree_search(&sock_tag_tree, sk);
1069 }
1070
1071 static struct sock_tag *get_sock_stat(const struct sock *sk)
1072 {
1073         struct sock_tag *sock_tag_entry;
1074         MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1075         if (!sk)
1076                 return NULL;
1077         spin_lock_bh(&sock_tag_list_lock);
1078         sock_tag_entry = get_sock_stat_nl(sk);
1079         spin_unlock_bh(&sock_tag_list_lock);
1080         return sock_tag_entry;
1081 }
1082
1083 static int ipx_proto(const struct sk_buff *skb,
1084                      struct xt_action_param *par)
1085 {
1086         int thoff = 0, tproto;
1087
1088         switch (par->family) {
1089         case NFPROTO_IPV6:
1090                 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
1091                 if (tproto < 0)
1092                         MT_DEBUG("%s(): transport header not found in ipv6"
1093                                  " skb=%p\n", __func__, skb);
1094                 break;
1095         case NFPROTO_IPV4:
1096                 tproto = ip_hdr(skb)->protocol;
1097                 break;
1098         default:
1099                 tproto = IPPROTO_RAW;
1100         }
1101         return tproto;
1102 }
1103
1104 static void
1105 data_counters_update(struct data_counters *dc, int set,
1106                      enum ifs_tx_rx direction, int proto, int bytes)
1107 {
1108         switch (proto) {
1109         case IPPROTO_TCP:
1110                 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1111                 break;
1112         case IPPROTO_UDP:
1113                 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1114                 break;
1115         case IPPROTO_IP:
1116         default:
1117                 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1118                                     1);
1119                 break;
1120         }
1121 }
1122
1123 /*
1124  * Update stats for the specified interface. Do nothing if the entry
1125  * does not exist (when a device was never configured with an IP address).
1126  * Called when an device is being unregistered.
1127  */
1128 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1129 {
1130         struct rtnl_link_stats64 dev_stats, *stats;
1131         struct iface_stat *entry;
1132
1133         stats = dev_get_stats(net_dev, &dev_stats);
1134         spin_lock_bh(&iface_stat_list_lock);
1135         entry = get_iface_entry(net_dev->name);
1136         if (entry == NULL) {
1137                 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1138                          net_dev->name);
1139                 spin_unlock_bh(&iface_stat_list_lock);
1140                 return;
1141         }
1142
1143         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1144                  net_dev->name, entry);
1145         if (!entry->active) {
1146                 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1147                          net_dev->name);
1148                 spin_unlock_bh(&iface_stat_list_lock);
1149                 return;
1150         }
1151
1152         if (stash_only) {
1153                 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1154                 entry->last_known[IFS_TX].packets = stats->tx_packets;
1155                 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1156                 entry->last_known[IFS_RX].packets = stats->rx_packets;
1157                 entry->last_known_valid = true;
1158                 IF_DEBUG("qtaguid: %s(%s): "
1159                          "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1160                          net_dev->name, stats->rx_bytes, stats->tx_bytes);
1161                 spin_unlock_bh(&iface_stat_list_lock);
1162                 return;
1163         }
1164         entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
1165         entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
1166         entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
1167         entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
1168         /* We don't need the last_known[] anymore */
1169         entry->last_known_valid = false;
1170         _iface_stat_set_active(entry, net_dev, false);
1171         IF_DEBUG("qtaguid: %s(%s): "
1172                  "disable tracking. rx/tx=%llu/%llu\n", __func__,
1173                  net_dev->name, stats->rx_bytes, stats->tx_bytes);
1174         spin_unlock_bh(&iface_stat_list_lock);
1175 }
1176
1177 /*
1178  * Update stats for the specified interface from the skb.
1179  * Do nothing if the entry
1180  * does not exist (when a device was never configured with an IP address).
1181  * Called on each sk.
1182  */
1183 static void iface_stat_update_from_skb(const struct sk_buff *skb,
1184                                        struct xt_action_param *par)
1185 {
1186         struct iface_stat *entry;
1187         const struct net_device *el_dev;
1188         enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
1189         int bytes = skb->len;
1190         int proto;
1191
1192         if (!skb->dev) {
1193                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1194                 el_dev = par->in ? : par->out;
1195         } else {
1196                 const struct net_device *other_dev;
1197                 el_dev = skb->dev;
1198                 other_dev = par->in ? : par->out;
1199                 if (el_dev != other_dev) {
1200                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1201                                  "par->(in/out)=%p %s\n",
1202                                  par->hooknum, el_dev, el_dev->name, other_dev,
1203                                  other_dev->name);
1204                 }
1205         }
1206
1207         if (unlikely(!el_dev)) {
1208                 pr_err_ratelimited("qtaguid[%d]: %s(): no par->in/out?!!\n",
1209                                    par->hooknum, __func__);
1210                 BUG();
1211         } else if (unlikely(!el_dev->name)) {
1212                 pr_err_ratelimited("qtaguid[%d]: %s(): no dev->name?!!\n",
1213                                    par->hooknum, __func__);
1214                 BUG();
1215         } else {
1216                 proto = ipx_proto(skb, par);
1217                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1218                          par->hooknum, el_dev->name, el_dev->type,
1219                          par->family, proto);
1220         }
1221
1222         spin_lock_bh(&iface_stat_list_lock);
1223         entry = get_iface_entry(el_dev->name);
1224         if (entry == NULL) {
1225                 IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
1226                          __func__, el_dev->name);
1227                 spin_unlock_bh(&iface_stat_list_lock);
1228                 return;
1229         }
1230
1231         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1232                  el_dev->name, entry);
1233
1234         data_counters_update(&entry->totals_via_skb, 0, direction, proto,
1235                              bytes);
1236         spin_unlock_bh(&iface_stat_list_lock);
1237 }
1238
1239 static void tag_stat_update(struct tag_stat *tag_entry,
1240                         enum ifs_tx_rx direction, int proto, int bytes)
1241 {
1242         int active_set;
1243         active_set = get_active_counter_set(tag_entry->tn.tag);
1244         MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1245                  "dir=%d proto=%d bytes=%d)\n",
1246                  tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1247                  active_set, direction, proto, bytes);
1248         data_counters_update(&tag_entry->counters, active_set, direction,
1249                              proto, bytes);
1250         if (tag_entry->parent_counters)
1251                 data_counters_update(tag_entry->parent_counters, active_set,
1252                                      direction, proto, bytes);
1253 }
1254
1255 /*
1256  * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1257  * the interface.
1258  * iface_entry->tag_stat_list_lock should be held.
1259  */
1260 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1261                                            tag_t tag)
1262 {
1263         struct tag_stat *new_tag_stat_entry = NULL;
1264         IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1265                  " (uid=%u)\n", __func__,
1266                  iface_entry, tag, get_uid_from_tag(tag));
1267         new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1268         if (!new_tag_stat_entry) {
1269                 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1270                 goto done;
1271         }
1272         new_tag_stat_entry->tn.tag = tag;
1273         tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1274 done:
1275         return new_tag_stat_entry;
1276 }
1277
1278 static void if_tag_stat_update(const char *ifname, uid_t uid,
1279                                const struct sock *sk, enum ifs_tx_rx direction,
1280                                int proto, int bytes)
1281 {
1282         struct tag_stat *tag_stat_entry;
1283         tag_t tag, acct_tag;
1284         tag_t uid_tag;
1285         struct data_counters *uid_tag_counters;
1286         struct sock_tag *sock_tag_entry;
1287         struct iface_stat *iface_entry;
1288         struct tag_stat *new_tag_stat = NULL;
1289         MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1290                 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1291                  ifname, uid, sk, direction, proto, bytes);
1292
1293
1294         iface_entry = get_iface_entry(ifname);
1295         if (!iface_entry) {
1296                 pr_err_ratelimited("qtaguid: iface_stat: stat_update() "
1297                                    "%s not found\n", ifname);
1298                 return;
1299         }
1300         /* It is ok to process data when an iface_entry is inactive */
1301
1302         MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1303                  ifname, iface_entry);
1304
1305         /*
1306          * Look for a tagged sock.
1307          * It will have an acct_uid.
1308          */
1309         sock_tag_entry = get_sock_stat(sk);
1310         if (sock_tag_entry) {
1311                 tag = sock_tag_entry->tag;
1312                 acct_tag = get_atag_from_tag(tag);
1313                 uid_tag = get_utag_from_tag(tag);
1314         } else {
1315                 acct_tag = make_atag_from_value(0);
1316                 tag = combine_atag_with_uid(acct_tag, uid);
1317                 uid_tag = make_tag_from_uid(uid);
1318         }
1319         MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1320                  " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1321                  tag, get_uid_from_tag(tag), iface_entry);
1322         /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1323         spin_lock_bh(&iface_entry->tag_stat_list_lock);
1324
1325         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1326                                               tag);
1327         if (tag_stat_entry) {
1328                 /*
1329                  * Updating the {acct_tag, uid_tag} entry handles both stats:
1330                  * {0, uid_tag} will also get updated.
1331                  */
1332                 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1333                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1334                 return;
1335         }
1336
1337         /* Loop over tag list under this interface for {0,uid_tag} */
1338         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1339                                               uid_tag);
1340         if (!tag_stat_entry) {
1341                 /* Here: the base uid_tag did not exist */
1342                 /*
1343                  * No parent counters. So
1344                  *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1345                  */
1346                 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1347                 if (!new_tag_stat)
1348                         goto unlock;
1349                 uid_tag_counters = &new_tag_stat->counters;
1350         } else {
1351                 uid_tag_counters = &tag_stat_entry->counters;
1352         }
1353
1354         if (acct_tag) {
1355                 /* Create the child {acct_tag, uid_tag} and hook up parent. */
1356                 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1357                 if (!new_tag_stat)
1358                         goto unlock;
1359                 new_tag_stat->parent_counters = uid_tag_counters;
1360         } else {
1361                 /*
1362                  * For new_tag_stat to be still NULL here would require:
1363                  *  {0, uid_tag} exists
1364                  *  and {acct_tag, uid_tag} doesn't exist
1365                  *  AND acct_tag == 0.
1366                  * Impossible. This reassures us that new_tag_stat
1367                  * below will always be assigned.
1368                  */
1369                 BUG_ON(!new_tag_stat);
1370         }
1371         tag_stat_update(new_tag_stat, direction, proto, bytes);
1372 unlock:
1373         spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1374 }
1375
1376 static int iface_netdev_event_handler(struct notifier_block *nb,
1377                                       unsigned long event, void *ptr) {
1378         struct net_device *dev = ptr;
1379
1380         if (unlikely(module_passive))
1381                 return NOTIFY_DONE;
1382
1383         IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1384                  "ev=0x%lx/%s netdev=%p->name=%s\n",
1385                  event, netdev_evt_str(event), dev, dev ? dev->name : "");
1386
1387         switch (event) {
1388         case NETDEV_UP:
1389                 iface_stat_create(dev, NULL);
1390                 atomic64_inc(&qtu_events.iface_events);
1391                 break;
1392         case NETDEV_DOWN:
1393         case NETDEV_UNREGISTER:
1394                 iface_stat_update(dev, event == NETDEV_DOWN);
1395                 atomic64_inc(&qtu_events.iface_events);
1396                 break;
1397         }
1398         return NOTIFY_DONE;
1399 }
1400
1401 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1402                                          unsigned long event, void *ptr)
1403 {
1404         struct inet6_ifaddr *ifa = ptr;
1405         struct net_device *dev;
1406
1407         if (unlikely(module_passive))
1408                 return NOTIFY_DONE;
1409
1410         IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1411                  "ev=0x%lx/%s ifa=%p\n",
1412                  event, netdev_evt_str(event), ifa);
1413
1414         switch (event) {
1415         case NETDEV_UP:
1416                 BUG_ON(!ifa || !ifa->idev);
1417                 dev = (struct net_device *)ifa->idev->dev;
1418                 iface_stat_create_ipv6(dev, ifa);
1419                 atomic64_inc(&qtu_events.iface_events);
1420                 break;
1421         case NETDEV_DOWN:
1422         case NETDEV_UNREGISTER:
1423                 BUG_ON(!ifa || !ifa->idev);
1424                 dev = (struct net_device *)ifa->idev->dev;
1425                 iface_stat_update(dev, event == NETDEV_DOWN);
1426                 atomic64_inc(&qtu_events.iface_events);
1427                 break;
1428         }
1429         return NOTIFY_DONE;
1430 }
1431
1432 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1433                                         unsigned long event, void *ptr)
1434 {
1435         struct in_ifaddr *ifa = ptr;
1436         struct net_device *dev;
1437
1438         if (unlikely(module_passive))
1439                 return NOTIFY_DONE;
1440
1441         IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1442                  "ev=0x%lx/%s ifa=%p\n",
1443                  event, netdev_evt_str(event), ifa);
1444
1445         switch (event) {
1446         case NETDEV_UP:
1447                 BUG_ON(!ifa || !ifa->ifa_dev);
1448                 dev = ifa->ifa_dev->dev;
1449                 iface_stat_create(dev, ifa);
1450                 atomic64_inc(&qtu_events.iface_events);
1451                 break;
1452         case NETDEV_DOWN:
1453         case NETDEV_UNREGISTER:
1454                 BUG_ON(!ifa || !ifa->ifa_dev);
1455                 dev = ifa->ifa_dev->dev;
1456                 iface_stat_update(dev, event == NETDEV_DOWN);
1457                 atomic64_inc(&qtu_events.iface_events);
1458                 break;
1459         }
1460         return NOTIFY_DONE;
1461 }
1462
1463 static struct notifier_block iface_netdev_notifier_blk = {
1464         .notifier_call = iface_netdev_event_handler,
1465 };
1466
1467 static struct notifier_block iface_inetaddr_notifier_blk = {
1468         .notifier_call = iface_inetaddr_event_handler,
1469 };
1470
1471 static struct notifier_block iface_inet6addr_notifier_blk = {
1472         .notifier_call = iface_inet6addr_event_handler,
1473 };
1474
1475 static const struct seq_operations iface_stat_fmt_proc_seq_ops = {
1476         .start  = iface_stat_fmt_proc_start,
1477         .next   = iface_stat_fmt_proc_next,
1478         .stop   = iface_stat_fmt_proc_stop,
1479         .show   = iface_stat_fmt_proc_show,
1480 };
1481
1482 static int proc_iface_stat_fmt_open(struct inode *inode, struct file *file)
1483 {
1484         struct proc_iface_stat_fmt_info *s;
1485
1486         s = __seq_open_private(file, &iface_stat_fmt_proc_seq_ops,
1487                         sizeof(struct proc_iface_stat_fmt_info));
1488         if (!s)
1489                 return -ENOMEM;
1490
1491         s->fmt = (int)PDE_DATA(inode);
1492         return 0;
1493 }
1494
1495 static const struct file_operations proc_iface_stat_fmt_fops = {
1496         .open           = proc_iface_stat_fmt_open,
1497         .read           = seq_read,
1498         .llseek         = seq_lseek,
1499         .release        = seq_release_private,
1500 };
1501
1502 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1503 {
1504         int err;
1505
1506         iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1507         if (!iface_stat_procdir) {
1508                 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1509                 err = -1;
1510                 goto err;
1511         }
1512
1513         iface_stat_all_procfile = proc_create_data(iface_stat_all_procfilename,
1514                                                    proc_iface_perms,
1515                                                    parent_procdir,
1516                                                    &proc_iface_stat_fmt_fops,
1517                                                    (void *)1 /* fmt1 */);
1518         if (!iface_stat_all_procfile) {
1519                 pr_err("qtaguid: iface_stat: init "
1520                        " failed to create stat_old proc entry\n");
1521                 err = -1;
1522                 goto err_zap_entry;
1523         }
1524
1525         iface_stat_fmt_procfile = proc_create_data(iface_stat_fmt_procfilename,
1526                                                    proc_iface_perms,
1527                                                    parent_procdir,
1528                                                    &proc_iface_stat_fmt_fops,
1529                                                    (void *)2 /* fmt2 */);
1530         if (!iface_stat_fmt_procfile) {
1531                 pr_err("qtaguid: iface_stat: init "
1532                        " failed to create stat_all proc entry\n");
1533                 err = -1;
1534                 goto err_zap_all_stats_entry;
1535         }
1536
1537
1538         err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1539         if (err) {
1540                 pr_err("qtaguid: iface_stat: init "
1541                        "failed to register dev event handler\n");
1542                 goto err_zap_all_stats_entries;
1543         }
1544         err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1545         if (err) {
1546                 pr_err("qtaguid: iface_stat: init "
1547                        "failed to register ipv4 dev event handler\n");
1548                 goto err_unreg_nd;
1549         }
1550
1551         err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1552         if (err) {
1553                 pr_err("qtaguid: iface_stat: init "
1554                        "failed to register ipv6 dev event handler\n");
1555                 goto err_unreg_ip4_addr;
1556         }
1557         return 0;
1558
1559 err_unreg_ip4_addr:
1560         unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1561 err_unreg_nd:
1562         unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1563 err_zap_all_stats_entries:
1564         remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
1565 err_zap_all_stats_entry:
1566         remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1567 err_zap_entry:
1568         remove_proc_entry(iface_stat_procdirname, parent_procdir);
1569 err:
1570         return err;
1571 }
1572
1573 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1574                                     struct xt_action_param *par)
1575 {
1576         struct sock *sk;
1577         unsigned int hook_mask = (1 << par->hooknum);
1578
1579         MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1580                  par->hooknum, par->family);
1581
1582         /*
1583          * Let's not abuse the the xt_socket_get*_sk(), or else it will
1584          * return garbage SKs.
1585          */
1586         if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1587                 return NULL;
1588
1589         switch (par->family) {
1590         case NFPROTO_IPV6:
1591                 sk = xt_socket_get6_sk(skb, par);
1592                 break;
1593         case NFPROTO_IPV4:
1594                 sk = xt_socket_get4_sk(skb, par);
1595                 break;
1596         default:
1597                 return NULL;
1598         }
1599
1600         if (sk) {
1601                 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1602                          "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1603                 /*
1604                  * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1605                  * "struct inet_timewait_sock" which is missing fields.
1606                  */
1607                 if (sk->sk_state  == TCP_TIME_WAIT) {
1608                         xt_socket_put_sk(sk);
1609                         sk = NULL;
1610                 }
1611         }
1612         return sk;
1613 }
1614
1615 static void account_for_uid(const struct sk_buff *skb,
1616                             const struct sock *alternate_sk, uid_t uid,
1617                             struct xt_action_param *par)
1618 {
1619         const struct net_device *el_dev;
1620
1621         if (!skb->dev) {
1622                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1623                 el_dev = par->in ? : par->out;
1624         } else {
1625                 const struct net_device *other_dev;
1626                 el_dev = skb->dev;
1627                 other_dev = par->in ? : par->out;
1628                 if (el_dev != other_dev) {
1629                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1630                                 "par->(in/out)=%p %s\n",
1631                                 par->hooknum, el_dev, el_dev->name, other_dev,
1632                                 other_dev->name);
1633                 }
1634         }
1635
1636         if (unlikely(!el_dev)) {
1637                 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1638         } else if (unlikely(!el_dev->name)) {
1639                 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1640         } else {
1641                 int proto = ipx_proto(skb, par);
1642                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1643                          par->hooknum, el_dev->name, el_dev->type,
1644                          par->family, proto);
1645
1646                 if_tag_stat_update(el_dev->name, uid,
1647                                 skb->sk ? skb->sk : alternate_sk,
1648                                 par->in ? IFS_RX : IFS_TX,
1649                                 proto, skb->len);
1650         }
1651 }
1652
1653 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1654 {
1655         const struct xt_qtaguid_match_info *info = par->matchinfo;
1656         const struct file *filp;
1657         bool got_sock = false;
1658         struct sock *sk;
1659         uid_t sock_uid;
1660         bool res;
1661
1662         if (unlikely(module_passive))
1663                 return (info->match ^ info->invert) == 0;
1664
1665         MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1666                  par->hooknum, skb, par->in, par->out, par->family);
1667
1668         atomic64_inc(&qtu_events.match_calls);
1669         if (skb == NULL) {
1670                 res = (info->match ^ info->invert) == 0;
1671                 goto ret_res;
1672         }
1673
1674         switch (par->hooknum) {
1675         case NF_INET_PRE_ROUTING:
1676         case NF_INET_POST_ROUTING:
1677                 atomic64_inc(&qtu_events.match_calls_prepost);
1678                 iface_stat_update_from_skb(skb, par);
1679                 /*
1680                  * We are done in pre/post. The skb will get processed
1681                  * further alter.
1682                  */
1683                 res = (info->match ^ info->invert);
1684                 goto ret_res;
1685                 break;
1686         /* default: Fall through and do UID releated work */
1687         }
1688
1689         sk = skb->sk;
1690         /*
1691          * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1692          * "struct inet_timewait_sock" which is missing fields.
1693          * So we ignore it.
1694          */
1695         if (sk && sk->sk_state == TCP_TIME_WAIT)
1696                 sk = NULL;
1697         if (sk == NULL) {
1698                 /*
1699                  * A missing sk->sk_socket happens when packets are in-flight
1700                  * and the matching socket is already closed and gone.
1701                  */
1702                 sk = qtaguid_find_sk(skb, par);
1703                 /*
1704                  * If we got the socket from the find_sk(), we will need to put
1705                  * it back, as nf_tproxy_get_sock_v4() got it.
1706                  */
1707                 got_sock = sk;
1708                 if (sk)
1709                         atomic64_inc(&qtu_events.match_found_sk_in_ct);
1710                 else
1711                         atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1712         } else {
1713                 atomic64_inc(&qtu_events.match_found_sk);
1714         }
1715         MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1716                  par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
1717         if (sk != NULL) {
1718                 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1719                         par->hooknum, sk, sk->sk_socket,
1720                         sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1721                 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1722                 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1723                         par->hooknum, filp ? filp->f_cred->fsuid : -1);
1724         }
1725
1726         if (sk == NULL || sk->sk_socket == NULL) {
1727                 /*
1728                  * Here, the qtaguid_find_sk() using connection tracking
1729                  * couldn't find the owner, so for now we just count them
1730                  * against the system.
1731                  */
1732                 /*
1733                  * TODO: unhack how to force just accounting.
1734                  * For now we only do iface stats when the uid-owner is not
1735                  * requested.
1736                  */
1737                 if (!(info->match & XT_QTAGUID_UID))
1738                         account_for_uid(skb, sk, 0, par);
1739                 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1740                         par->hooknum,
1741                         sk ? sk->sk_socket : NULL);
1742                 res = (info->match ^ info->invert) == 0;
1743                 atomic64_inc(&qtu_events.match_no_sk);
1744                 goto put_sock_ret_res;
1745         } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1746                 res = false;
1747                 goto put_sock_ret_res;
1748         }
1749         filp = sk->sk_socket->file;
1750         if (filp == NULL) {
1751                 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1752                 account_for_uid(skb, sk, 0, par);
1753                 res = ((info->match ^ info->invert) &
1754                         (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1755                 atomic64_inc(&qtu_events.match_no_sk_file);
1756                 goto put_sock_ret_res;
1757         }
1758         sock_uid = filp->f_cred->fsuid;
1759         /*
1760          * TODO: unhack how to force just accounting.
1761          * For now we only do iface stats when the uid-owner is not requested
1762          */
1763         if (!(info->match & XT_QTAGUID_UID))
1764                 account_for_uid(skb, sk, sock_uid, par);
1765
1766         /*
1767          * The following two tests fail the match when:
1768          *    id not in range AND no inverted condition requested
1769          * or id     in range AND    inverted condition requested
1770          * Thus (!a && b) || (a && !b) == a ^ b
1771          */
1772         if (info->match & XT_QTAGUID_UID)
1773                 if ((filp->f_cred->fsuid >= info->uid_min &&
1774                      filp->f_cred->fsuid <= info->uid_max) ^
1775                     !(info->invert & XT_QTAGUID_UID)) {
1776                         MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1777                                  par->hooknum);
1778                         res = false;
1779                         goto put_sock_ret_res;
1780                 }
1781         if (info->match & XT_QTAGUID_GID)
1782                 if ((filp->f_cred->fsgid >= info->gid_min &&
1783                                 filp->f_cred->fsgid <= info->gid_max) ^
1784                         !(info->invert & XT_QTAGUID_GID)) {
1785                         MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1786                                 par->hooknum);
1787                         res = false;
1788                         goto put_sock_ret_res;
1789                 }
1790
1791         MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1792         res = true;
1793
1794 put_sock_ret_res:
1795         if (got_sock)
1796                 xt_socket_put_sk(sk);
1797 ret_res:
1798         MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1799         return res;
1800 }
1801
1802 #ifdef DDEBUG
1803 /* This function is not in xt_qtaguid_print.c because of locks visibility */
1804 static void prdebug_full_state(int indent_level, const char *fmt, ...)
1805 {
1806         va_list args;
1807         char *fmt_buff;
1808         char *buff;
1809
1810         if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1811                 return;
1812
1813         fmt_buff = kasprintf(GFP_ATOMIC,
1814                              "qtaguid: %s(): %s {\n", __func__, fmt);
1815         BUG_ON(!fmt_buff);
1816         va_start(args, fmt);
1817         buff = kvasprintf(GFP_ATOMIC,
1818                           fmt_buff, args);
1819         BUG_ON(!buff);
1820         pr_debug("%s", buff);
1821         kfree(fmt_buff);
1822         kfree(buff);
1823         va_end(args);
1824
1825         spin_lock_bh(&sock_tag_list_lock);
1826         prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1827         spin_unlock_bh(&sock_tag_list_lock);
1828
1829         spin_lock_bh(&sock_tag_list_lock);
1830         spin_lock_bh(&uid_tag_data_tree_lock);
1831         prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1832         prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1833         spin_unlock_bh(&uid_tag_data_tree_lock);
1834         spin_unlock_bh(&sock_tag_list_lock);
1835
1836         spin_lock_bh(&iface_stat_list_lock);
1837         prdebug_iface_stat_list(indent_level, &iface_stat_list);
1838         spin_unlock_bh(&iface_stat_list_lock);
1839
1840         pr_debug("qtaguid: %s(): }\n", __func__);
1841 }
1842 #else
1843 static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1844 #endif
1845
1846 struct proc_ctrl_print_info {
1847         struct sock *sk; /* socket found by reading to sk_pos */
1848         loff_t sk_pos;
1849 };
1850
1851 static void *qtaguid_ctrl_proc_next(struct seq_file *m, void *v, loff_t *pos)
1852 {
1853         struct proc_ctrl_print_info *pcpi = m->private;
1854         struct sock_tag *sock_tag_entry = v;
1855         struct rb_node *node;
1856
1857         (*pos)++;
1858
1859         if (!v || v  == SEQ_START_TOKEN)
1860                 return NULL;
1861
1862         node = rb_next(&sock_tag_entry->sock_node);
1863         if (!node) {
1864                 pcpi->sk = NULL;
1865                 sock_tag_entry = SEQ_START_TOKEN;
1866         } else {
1867                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1868                 pcpi->sk = sock_tag_entry->sk;
1869         }
1870         pcpi->sk_pos = *pos;
1871         return sock_tag_entry;
1872 }
1873
1874 static void *qtaguid_ctrl_proc_start(struct seq_file *m, loff_t *pos)
1875 {
1876         struct proc_ctrl_print_info *pcpi = m->private;
1877         struct sock_tag *sock_tag_entry;
1878         struct rb_node *node;
1879
1880         spin_lock_bh(&sock_tag_list_lock);
1881
1882         if (unlikely(module_passive))
1883                 return NULL;
1884
1885         if (*pos == 0) {
1886                 pcpi->sk_pos = 0;
1887                 node = rb_first(&sock_tag_tree);
1888                 if (!node) {
1889                         pcpi->sk = NULL;
1890                         return SEQ_START_TOKEN;
1891                 }
1892                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1893                 pcpi->sk = sock_tag_entry->sk;
1894         } else {
1895                 sock_tag_entry = (pcpi->sk ? get_sock_stat_nl(pcpi->sk) :
1896                                                 NULL) ?: SEQ_START_TOKEN;
1897                 if (*pos != pcpi->sk_pos) {
1898                         /* seq_read skipped a next call */
1899                         *pos = pcpi->sk_pos;
1900                         return qtaguid_ctrl_proc_next(m, sock_tag_entry, pos);
1901                 }
1902         }
1903         return sock_tag_entry;
1904 }
1905
1906 static void qtaguid_ctrl_proc_stop(struct seq_file *m, void *v)
1907 {
1908         spin_unlock_bh(&sock_tag_list_lock);
1909 }
1910
1911 /*
1912  * Procfs reader to get all active socket tags using style "1)" as described in
1913  * fs/proc/generic.c
1914  */
1915 static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v)
1916 {
1917         struct sock_tag *sock_tag_entry = v;
1918         uid_t uid;
1919         long f_count;
1920
1921         CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n",
1922                  current->pid, current->tgid, current_fsuid());
1923
1924         if (sock_tag_entry != SEQ_START_TOKEN) {
1925                 uid = get_uid_from_tag(sock_tag_entry->tag);
1926                 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1927                          "pid=%u\n",
1928                          sock_tag_entry->sk,
1929                          sock_tag_entry->tag,
1930                          uid,
1931                          sock_tag_entry->pid
1932                         );
1933                 f_count = atomic_long_read(
1934                         &sock_tag_entry->socket->file->f_count);
1935                 seq_printf(m, "sock=%p tag=0x%llx (uid=%u) pid=%u "
1936                            "f_count=%lu\n",
1937                            sock_tag_entry->sk,
1938                            sock_tag_entry->tag, uid,
1939                            sock_tag_entry->pid, f_count);
1940         } else {
1941                 seq_printf(m, "events: sockets_tagged=%llu "
1942                            "sockets_untagged=%llu "
1943                            "counter_set_changes=%llu "
1944                            "delete_cmds=%llu "
1945                            "iface_events=%llu "
1946                            "match_calls=%llu "
1947                            "match_calls_prepost=%llu "
1948                            "match_found_sk=%llu "
1949                            "match_found_sk_in_ct=%llu "
1950                            "match_found_no_sk_in_ct=%llu "
1951                            "match_no_sk=%llu "
1952                            "match_no_sk_file=%llu\n",
1953                            atomic64_read(&qtu_events.sockets_tagged),
1954                            atomic64_read(&qtu_events.sockets_untagged),
1955                            atomic64_read(&qtu_events.counter_set_changes),
1956                            atomic64_read(&qtu_events.delete_cmds),
1957                            atomic64_read(&qtu_events.iface_events),
1958                            atomic64_read(&qtu_events.match_calls),
1959                            atomic64_read(&qtu_events.match_calls_prepost),
1960                            atomic64_read(&qtu_events.match_found_sk),
1961                            atomic64_read(&qtu_events.match_found_sk_in_ct),
1962                            atomic64_read(&qtu_events.match_found_no_sk_in_ct),
1963                            atomic64_read(&qtu_events.match_no_sk),
1964                            atomic64_read(&qtu_events.match_no_sk_file));
1965
1966                 /* Count the following as part of the last item_index */
1967                 prdebug_full_state(0, "proc ctrl");
1968         }
1969
1970         return 0;
1971 }
1972
1973 /*
1974  * Delete socket tags, and stat tags associated with a given
1975  * accouting tag and uid.
1976  */
1977 static int ctrl_cmd_delete(const char *input)
1978 {
1979         char cmd;
1980         uid_t uid;
1981         uid_t entry_uid;
1982         tag_t acct_tag;
1983         tag_t tag;
1984         int res, argc;
1985         struct iface_stat *iface_entry;
1986         struct rb_node *node;
1987         struct sock_tag *st_entry;
1988         struct rb_root st_to_free_tree = RB_ROOT;
1989         struct tag_stat *ts_entry;
1990         struct tag_counter_set *tcs_entry;
1991         struct tag_ref *tr_entry;
1992         struct uid_tag_data *utd_entry;
1993
1994         argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
1995         CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1996                  "user_tag=0x%llx uid=%u\n", input, argc, cmd,
1997                  acct_tag, uid);
1998         if (argc < 2) {
1999                 res = -EINVAL;
2000                 goto err;
2001         }
2002         if (!valid_atag(acct_tag)) {
2003                 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
2004                 res = -EINVAL;
2005                 goto err;
2006         }
2007         if (argc < 3) {
2008                 uid = current_fsuid();
2009         } else if (!can_impersonate_uid(uid)) {
2010                 pr_info("qtaguid: ctrl_delete(%s): "
2011                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2012                         input, current->pid, current->tgid, current_fsuid());
2013                 res = -EPERM;
2014                 goto err;
2015         }
2016
2017         tag = combine_atag_with_uid(acct_tag, uid);
2018         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2019                  "looking for tag=0x%llx (uid=%u)\n",
2020                  input, tag, uid);
2021
2022         /* Delete socket tags */
2023         spin_lock_bh(&sock_tag_list_lock);
2024         node = rb_first(&sock_tag_tree);
2025         while (node) {
2026                 st_entry = rb_entry(node, struct sock_tag, sock_node);
2027                 entry_uid = get_uid_from_tag(st_entry->tag);
2028                 node = rb_next(node);
2029                 if (entry_uid != uid)
2030                         continue;
2031
2032                 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
2033                          input, st_entry->tag, entry_uid);
2034
2035                 if (!acct_tag || st_entry->tag == tag) {
2036                         rb_erase(&st_entry->sock_node, &sock_tag_tree);
2037                         /* Can't sockfd_put() within spinlock, do it later. */
2038                         sock_tag_tree_insert(st_entry, &st_to_free_tree);
2039                         tr_entry = lookup_tag_ref(st_entry->tag, NULL);
2040                         BUG_ON(tr_entry->num_sock_tags <= 0);
2041                         tr_entry->num_sock_tags--;
2042                         /*
2043                          * TODO: remove if, and start failing.
2044                          * This is a hack to work around the fact that in some
2045                          * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
2046                          * and are trying to work around apps
2047                          * that didn't open the /dev/xt_qtaguid.
2048                          */
2049                         if (st_entry->list.next && st_entry->list.prev)
2050                                 list_del(&st_entry->list);
2051                 }
2052         }
2053         spin_unlock_bh(&sock_tag_list_lock);
2054
2055         sock_tag_tree_erase(&st_to_free_tree);
2056
2057         /* Delete tag counter-sets */
2058         spin_lock_bh(&tag_counter_set_list_lock);
2059         /* Counter sets are only on the uid tag, not full tag */
2060         tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2061         if (tcs_entry) {
2062                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2063                          "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
2064                          input,
2065                          tcs_entry->tn.tag,
2066                          get_uid_from_tag(tcs_entry->tn.tag),
2067                          tcs_entry->active_set);
2068                 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
2069                 kfree(tcs_entry);
2070         }
2071         spin_unlock_bh(&tag_counter_set_list_lock);
2072
2073         /*
2074          * If acct_tag is 0, then all entries belonging to uid are
2075          * erased.
2076          */
2077         spin_lock_bh(&iface_stat_list_lock);
2078         list_for_each_entry(iface_entry, &iface_stat_list, list) {
2079                 spin_lock_bh(&iface_entry->tag_stat_list_lock);
2080                 node = rb_first(&iface_entry->tag_stat_tree);
2081                 while (node) {
2082                         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2083                         entry_uid = get_uid_from_tag(ts_entry->tn.tag);
2084                         node = rb_next(node);
2085
2086                         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2087                                  "ts tag=0x%llx (uid=%u)\n",
2088                                  input, ts_entry->tn.tag, entry_uid);
2089
2090                         if (entry_uid != uid)
2091                                 continue;
2092                         if (!acct_tag || ts_entry->tn.tag == tag) {
2093                                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2094                                          "erase ts: %s 0x%llx %u\n",
2095                                          input, iface_entry->ifname,
2096                                          get_atag_from_tag(ts_entry->tn.tag),
2097                                          entry_uid);
2098                                 rb_erase(&ts_entry->tn.node,
2099                                          &iface_entry->tag_stat_tree);
2100                                 kfree(ts_entry);
2101                         }
2102                 }
2103                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
2104         }
2105         spin_unlock_bh(&iface_stat_list_lock);
2106
2107         /* Cleanup the uid_tag_data */
2108         spin_lock_bh(&uid_tag_data_tree_lock);
2109         node = rb_first(&uid_tag_data_tree);
2110         while (node) {
2111                 utd_entry = rb_entry(node, struct uid_tag_data, node);
2112                 entry_uid = utd_entry->uid;
2113                 node = rb_next(node);
2114
2115                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2116                          "utd uid=%u\n",
2117                          input, entry_uid);
2118
2119                 if (entry_uid != uid)
2120                         continue;
2121                 /*
2122                  * Go over the tag_refs, and those that don't have
2123                  * sock_tags using them are freed.
2124                  */
2125                 put_tag_ref_tree(tag, utd_entry);
2126                 put_utd_entry(utd_entry);
2127         }
2128         spin_unlock_bh(&uid_tag_data_tree_lock);
2129
2130         atomic64_inc(&qtu_events.delete_cmds);
2131         res = 0;
2132
2133 err:
2134         return res;
2135 }
2136
2137 static int ctrl_cmd_counter_set(const char *input)
2138 {
2139         char cmd;
2140         uid_t uid = 0;
2141         tag_t tag;
2142         int res, argc;
2143         struct tag_counter_set *tcs;
2144         int counter_set;
2145
2146         argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2147         CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2148                  "set=%d uid=%u\n", input, argc, cmd,
2149                  counter_set, uid);
2150         if (argc != 3) {
2151                 res = -EINVAL;
2152                 goto err;
2153         }
2154         if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2155                 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2156                         input);
2157                 res = -EINVAL;
2158                 goto err;
2159         }
2160         if (!can_manipulate_uids()) {
2161                 pr_info("qtaguid: ctrl_counterset(%s): "
2162                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2163                         input, current->pid, current->tgid, current_fsuid());
2164                 res = -EPERM;
2165                 goto err;
2166         }
2167
2168         tag = make_tag_from_uid(uid);
2169         spin_lock_bh(&tag_counter_set_list_lock);
2170         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2171         if (!tcs) {
2172                 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2173                 if (!tcs) {
2174                         spin_unlock_bh(&tag_counter_set_list_lock);
2175                         pr_err("qtaguid: ctrl_counterset(%s): "
2176                                "failed to alloc counter set\n",
2177                                input);
2178                         res = -ENOMEM;
2179                         goto err;
2180                 }
2181                 tcs->tn.tag = tag;
2182                 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2183                 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2184                          "(uid=%u) set=%d\n",
2185                          input, tag, get_uid_from_tag(tag), counter_set);
2186         }
2187         tcs->active_set = counter_set;
2188         spin_unlock_bh(&tag_counter_set_list_lock);
2189         atomic64_inc(&qtu_events.counter_set_changes);
2190         res = 0;
2191
2192 err:
2193         return res;
2194 }
2195
2196 static int ctrl_cmd_tag(const char *input)
2197 {
2198         char cmd;
2199         int sock_fd = 0;
2200         uid_t uid = 0;
2201         tag_t acct_tag = make_atag_from_value(0);
2202         tag_t full_tag;
2203         struct socket *el_socket;
2204         int res, argc;
2205         struct sock_tag *sock_tag_entry;
2206         struct tag_ref *tag_ref_entry;
2207         struct uid_tag_data *uid_tag_data_entry;
2208         struct proc_qtu_data *pqd_entry;
2209
2210         /* Unassigned args will get defaulted later. */
2211         argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
2212         CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2213                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2214                  acct_tag, uid);
2215         if (argc < 2) {
2216                 res = -EINVAL;
2217                 goto err;
2218         }
2219         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2220         if (!el_socket) {
2221                 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2222                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2223                         input, sock_fd, res, current->pid, current->tgid,
2224                         current_fsuid());
2225                 goto err;
2226         }
2227         CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2228                  input, atomic_long_read(&el_socket->file->f_count),
2229                  el_socket->sk);
2230         if (argc < 3) {
2231                 acct_tag = make_atag_from_value(0);
2232         } else if (!valid_atag(acct_tag)) {
2233                 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2234                 res = -EINVAL;
2235                 goto err_put;
2236         }
2237         CT_DEBUG("qtaguid: ctrl_tag(%s): "
2238                  "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2239                  "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
2240                  input, current->pid, current->tgid, current_uid(),
2241                  current_euid(), current_fsuid(),
2242                  xt_qtaguid_ctrl_file->gid,
2243                  in_group_p(xt_qtaguid_ctrl_file->gid),
2244                  in_egroup_p(xt_qtaguid_ctrl_file->gid));
2245         if (argc < 4) {
2246                 uid = current_fsuid();
2247         } else if (!can_impersonate_uid(uid)) {
2248                 pr_info("qtaguid: ctrl_tag(%s): "
2249                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2250                         input, current->pid, current->tgid, current_fsuid());
2251                 res = -EPERM;
2252                 goto err_put;
2253         }
2254         full_tag = combine_atag_with_uid(acct_tag, uid);
2255
2256         spin_lock_bh(&sock_tag_list_lock);
2257         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2258         tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2259         if (IS_ERR(tag_ref_entry)) {
2260                 res = PTR_ERR(tag_ref_entry);
2261                 spin_unlock_bh(&sock_tag_list_lock);
2262                 goto err_put;
2263         }
2264         tag_ref_entry->num_sock_tags++;
2265         if (sock_tag_entry) {
2266                 struct tag_ref *prev_tag_ref_entry;
2267
2268                 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2269                          "st@%p ...->f_count=%ld\n",
2270                          input, el_socket->sk, sock_tag_entry,
2271                          atomic_long_read(&el_socket->file->f_count));
2272                 /*
2273                  * This is a re-tagging, so release the sock_fd that was
2274                  * locked at the time of the 1st tagging.
2275                  * There is still the ref from this call's sockfd_lookup() so
2276                  * it can be done within the spinlock.
2277                  */
2278                 sockfd_put(sock_tag_entry->socket);
2279                 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2280                                                     &uid_tag_data_entry);
2281                 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2282                 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2283                 prev_tag_ref_entry->num_sock_tags--;
2284                 sock_tag_entry->tag = full_tag;
2285         } else {
2286                 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2287                          input, el_socket->sk);
2288                 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2289                                          GFP_ATOMIC);
2290                 if (!sock_tag_entry) {
2291                         pr_err("qtaguid: ctrl_tag(%s): "
2292                                "socket tag alloc failed\n",
2293                                input);
2294                         spin_unlock_bh(&sock_tag_list_lock);
2295                         res = -ENOMEM;
2296                         goto err_tag_unref_put;
2297                 }
2298                 sock_tag_entry->sk = el_socket->sk;
2299                 sock_tag_entry->socket = el_socket;
2300                 sock_tag_entry->pid = current->tgid;
2301                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
2302                                                             uid);
2303                 spin_lock_bh(&uid_tag_data_tree_lock);
2304                 pqd_entry = proc_qtu_data_tree_search(
2305                         &proc_qtu_data_tree, current->tgid);
2306                 /*
2307                  * TODO: remove if, and start failing.
2308                  * At first, we want to catch user-space code that is not
2309                  * opening the /dev/xt_qtaguid.
2310                  */
2311                 if (IS_ERR_OR_NULL(pqd_entry))
2312                         pr_warn_once(
2313                                 "qtaguid: %s(): "
2314                                 "User space forgot to open /dev/xt_qtaguid? "
2315                                 "pid=%u tgid=%u uid=%u\n", __func__,
2316                                 current->pid, current->tgid,
2317                                 current_fsuid());
2318                 else
2319                         list_add(&sock_tag_entry->list,
2320                                  &pqd_entry->sock_tag_list);
2321                 spin_unlock_bh(&uid_tag_data_tree_lock);
2322
2323                 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2324                 atomic64_inc(&qtu_events.sockets_tagged);
2325         }
2326         spin_unlock_bh(&sock_tag_list_lock);
2327         /* We keep the ref to the socket (file) until it is untagged */
2328         CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2329                  input, sock_tag_entry,
2330                  atomic_long_read(&el_socket->file->f_count));
2331         return 0;
2332
2333 err_tag_unref_put:
2334         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2335         tag_ref_entry->num_sock_tags--;
2336         free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2337 err_put:
2338         CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2339                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2340         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2341         sockfd_put(el_socket);
2342         return res;
2343
2344 err:
2345         CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2346         return res;
2347 }
2348
2349 static int ctrl_cmd_untag(const char *input)
2350 {
2351         char cmd;
2352         int sock_fd = 0;
2353         struct socket *el_socket;
2354         int res, argc;
2355         struct sock_tag *sock_tag_entry;
2356         struct tag_ref *tag_ref_entry;
2357         struct uid_tag_data *utd_entry;
2358         struct proc_qtu_data *pqd_entry;
2359
2360         argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2361         CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2362                  input, argc, cmd, sock_fd);
2363         if (argc < 2) {
2364                 res = -EINVAL;
2365                 goto err;
2366         }
2367         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2368         if (!el_socket) {
2369                 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2370                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2371                         input, sock_fd, res, current->pid, current->tgid,
2372                         current_fsuid());
2373                 goto err;
2374         }
2375         CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2376                  input, atomic_long_read(&el_socket->file->f_count),
2377                  el_socket->sk);
2378         spin_lock_bh(&sock_tag_list_lock);
2379         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2380         if (!sock_tag_entry) {
2381                 spin_unlock_bh(&sock_tag_list_lock);
2382                 res = -EINVAL;
2383                 goto err_put;
2384         }
2385         /*
2386          * The socket already belongs to the current process
2387          * so it can do whatever it wants to it.
2388          */
2389         rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2390
2391         tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2392         BUG_ON(!tag_ref_entry);
2393         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2394         spin_lock_bh(&uid_tag_data_tree_lock);
2395         pqd_entry = proc_qtu_data_tree_search(
2396                 &proc_qtu_data_tree, current->tgid);
2397         /*
2398          * TODO: remove if, and start failing.
2399          * At first, we want to catch user-space code that is not
2400          * opening the /dev/xt_qtaguid.
2401          */
2402         if (IS_ERR_OR_NULL(pqd_entry))
2403                 pr_warn_once("qtaguid: %s(): "
2404                              "User space forgot to open /dev/xt_qtaguid? "
2405                              "pid=%u tgid=%u uid=%u\n", __func__,
2406                              current->pid, current->tgid, current_fsuid());
2407         else
2408                 list_del(&sock_tag_entry->list);
2409         spin_unlock_bh(&uid_tag_data_tree_lock);
2410         /*
2411          * We don't free tag_ref from the utd_entry here,
2412          * only during a cmd_delete().
2413          */
2414         tag_ref_entry->num_sock_tags--;
2415         spin_unlock_bh(&sock_tag_list_lock);
2416         /*
2417          * Release the sock_fd that was grabbed at tag time,
2418          * and once more for the sockfd_lookup() here.
2419          */
2420         sockfd_put(sock_tag_entry->socket);
2421         CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2422                  input, sock_tag_entry,
2423                  atomic_long_read(&el_socket->file->f_count) - 1);
2424         sockfd_put(el_socket);
2425
2426         kfree(sock_tag_entry);
2427         atomic64_inc(&qtu_events.sockets_untagged);
2428
2429         return 0;
2430
2431 err_put:
2432         CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2433                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2434         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2435         sockfd_put(el_socket);
2436         return res;
2437
2438 err:
2439         CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2440         return res;
2441 }
2442
2443 static int qtaguid_ctrl_parse(const char *input, int count)
2444 {
2445         char cmd;
2446         int res;
2447
2448         CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
2449                  input, current->pid, current->tgid, current_fsuid());
2450
2451         cmd = input[0];
2452         /* Collect params for commands */
2453         switch (cmd) {
2454         case 'd':
2455                 res = ctrl_cmd_delete(input);
2456                 break;
2457
2458         case 's':
2459                 res = ctrl_cmd_counter_set(input);
2460                 break;
2461
2462         case 't':
2463                 res = ctrl_cmd_tag(input);
2464                 break;
2465
2466         case 'u':
2467                 res = ctrl_cmd_untag(input);
2468                 break;
2469
2470         default:
2471                 res = -EINVAL;
2472                 goto err;
2473         }
2474         if (!res)
2475                 res = count;
2476 err:
2477         CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
2478         return res;
2479 }
2480
2481 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2482 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2483                                    size_t count, loff_t *offp)
2484 {
2485         char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2486
2487         if (unlikely(module_passive))
2488                 return count;
2489
2490         if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2491                 return -EINVAL;
2492
2493         if (copy_from_user(input_buf, buffer, count))
2494                 return -EFAULT;
2495
2496         input_buf[count] = '\0';
2497         return qtaguid_ctrl_parse(input_buf, count);
2498 }
2499
2500 struct proc_print_info {
2501         struct iface_stat *iface_entry;
2502         int item_index;
2503         tag_t tag; /* tag found by reading to tag_pos */
2504         off_t tag_pos;
2505         int tag_item_index;
2506 };
2507
2508 static void pp_stats_header(struct seq_file *m)
2509 {
2510         seq_puts(m,
2511                  "idx iface acct_tag_hex uid_tag_int cnt_set "
2512                  "rx_bytes rx_packets "
2513                  "tx_bytes tx_packets "
2514                  "rx_tcp_bytes rx_tcp_packets "
2515                  "rx_udp_bytes rx_udp_packets "
2516                  "rx_other_bytes rx_other_packets "
2517                  "tx_tcp_bytes tx_tcp_packets "
2518                  "tx_udp_bytes tx_udp_packets "
2519                  "tx_other_bytes tx_other_packets\n");
2520 }
2521
2522 static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry,
2523                          int cnt_set)
2524 {
2525         int ret;
2526         struct data_counters *cnts;
2527         tag_t tag = ts_entry->tn.tag;
2528         uid_t stat_uid = get_uid_from_tag(tag);
2529         struct proc_print_info *ppi = m->private;
2530         /* Detailed tags are not available to everybody */
2531         if (get_atag_from_tag(tag) && !can_read_other_uid_stats(stat_uid)) {
2532                 CT_DEBUG("qtaguid: stats line: "
2533                          "%s 0x%llx %u: insufficient priv "
2534                          "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
2535                          ppi->iface_entry->ifname,
2536                          get_atag_from_tag(tag), stat_uid,
2537                          current->pid, current->tgid, current_fsuid(),
2538                          xt_qtaguid_stats_file->gid);
2539                 return 0;
2540         }
2541         ppi->item_index++;
2542         cnts = &ts_entry->counters;
2543         ret = seq_printf(m, "%d %s 0x%llx %u %u "
2544                 "%llu %llu "
2545                 "%llu %llu "
2546                 "%llu %llu "
2547                 "%llu %llu "
2548                 "%llu %llu "
2549                 "%llu %llu "
2550                 "%llu %llu "
2551                 "%llu %llu\n",
2552                 ppi->item_index,
2553                 ppi->iface_entry->ifname,
2554                 get_atag_from_tag(tag),
2555                 stat_uid,
2556                 cnt_set,
2557                 dc_sum_bytes(cnts, cnt_set, IFS_RX),
2558                 dc_sum_packets(cnts, cnt_set, IFS_RX),
2559                 dc_sum_bytes(cnts, cnt_set, IFS_TX),
2560                 dc_sum_packets(cnts, cnt_set, IFS_TX),
2561                 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2562                 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2563                 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2564                 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2565                 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2566                 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2567                 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2568                 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2569                 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2570                 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2571                 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2572                 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2573         return ret ?: 1;
2574 }
2575
2576 static bool pp_sets(struct seq_file *m, struct tag_stat *ts_entry)
2577 {
2578         int ret;
2579         int counter_set;
2580         for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2581              counter_set++) {
2582                 ret = pp_stats_line(m, ts_entry, counter_set);
2583                 if (ret < 0)
2584                         return false;
2585         }
2586         return true;
2587 }
2588
2589 static int qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat *ptr)
2590 {
2591         struct iface_stat *iface_entry;
2592
2593         if (!ptr)
2594                 return false;
2595
2596         list_for_each_entry(iface_entry, &iface_stat_list, list)
2597                 if (iface_entry == ptr)
2598                         return true;
2599         return false;
2600 }
2601
2602 static void qtaguid_stats_proc_next_iface_entry(struct proc_print_info *ppi)
2603 {
2604         spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2605         list_for_each_entry_continue(ppi->iface_entry, &iface_stat_list, list) {
2606                 spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2607                 return;
2608         }
2609         ppi->iface_entry = NULL;
2610 }
2611
2612 static void *qtaguid_stats_proc_next(struct seq_file *m, void *v, loff_t *pos)
2613 {
2614         struct proc_print_info *ppi = m->private;
2615         struct tag_stat *ts_entry;
2616         struct rb_node *node;
2617
2618         if (!v) {
2619                 pr_err("qtaguid: %s(): unexpected v: NULL\n", __func__);
2620                 return NULL;
2621         }
2622
2623         (*pos)++;
2624
2625         if (!ppi->iface_entry || unlikely(module_passive))
2626                 return NULL;
2627
2628         if (v == SEQ_START_TOKEN)
2629                 node = rb_first(&ppi->iface_entry->tag_stat_tree);
2630         else
2631                 node = rb_next(&((struct tag_stat *)v)->tn.node);
2632
2633         while (!node) {
2634                 qtaguid_stats_proc_next_iface_entry(ppi);
2635                 if (!ppi->iface_entry)
2636                         return NULL;
2637                 node = rb_first(&ppi->iface_entry->tag_stat_tree);
2638         }
2639
2640         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2641         ppi->tag = ts_entry->tn.tag;
2642         ppi->tag_pos = *pos;
2643         ppi->tag_item_index = ppi->item_index;
2644         return ts_entry;
2645 }
2646
2647 static void *qtaguid_stats_proc_start(struct seq_file *m, loff_t *pos)
2648 {
2649         struct proc_print_info *ppi = m->private;
2650         struct tag_stat *ts_entry = NULL;
2651
2652         spin_lock_bh(&iface_stat_list_lock);
2653
2654         if (*pos == 0) {
2655                 ppi->item_index = 1;
2656                 ppi->tag_pos = 0;
2657                 if (list_empty(&iface_stat_list)) {
2658                         ppi->iface_entry = NULL;
2659                 } else {
2660                         ppi->iface_entry = list_first_entry(&iface_stat_list,
2661                                                             struct iface_stat,
2662                                                             list);
2663                         spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2664                 }
2665                 return SEQ_START_TOKEN;
2666         }
2667         if (!qtaguid_stats_proc_iface_stat_ptr_valid(ppi->iface_entry)) {
2668                 if (ppi->iface_entry) {
2669                         pr_err("qtaguid: %s(): iface_entry %p not found\n",
2670                                __func__, ppi->iface_entry);
2671                         ppi->iface_entry = NULL;
2672                 }
2673                 return NULL;
2674         }
2675
2676         spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2677
2678         if (!ppi->tag_pos) {
2679                 /* seq_read skipped first next call */
2680                 ts_entry = SEQ_START_TOKEN;
2681         } else {
2682                 ts_entry = tag_stat_tree_search(
2683                                 &ppi->iface_entry->tag_stat_tree, ppi->tag);
2684                 if (!ts_entry) {
2685                         pr_info("qtaguid: %s(): tag_stat.tag 0x%llx not found. Abort.\n",
2686                                 __func__, ppi->tag);
2687                         return NULL;
2688                 }
2689         }
2690
2691         if (*pos == ppi->tag_pos) { /* normal resume */
2692                 ppi->item_index = ppi->tag_item_index;
2693         } else {
2694                 /* seq_read skipped a next call */
2695                 *pos = ppi->tag_pos;
2696                 ts_entry = qtaguid_stats_proc_next(m, ts_entry, pos);
2697         }
2698
2699         return ts_entry;
2700 }
2701
2702 static void qtaguid_stats_proc_stop(struct seq_file *m, void *v)
2703 {
2704         struct proc_print_info *ppi = m->private;
2705         if (ppi->iface_entry)
2706                 spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2707         spin_unlock_bh(&iface_stat_list_lock);
2708 }
2709
2710 /*
2711  * Procfs reader to get all tag stats using style "1)" as described in
2712  * fs/proc/generic.c
2713  * Groups all protocols tx/rx bytes.
2714  */
2715 static int qtaguid_stats_proc_show(struct seq_file *m, void *v)
2716 {
2717         struct tag_stat *ts_entry = v;
2718
2719         if (v == SEQ_START_TOKEN)
2720                 pp_stats_header(m);
2721         else
2722                 pp_sets(m, ts_entry);
2723
2724         return 0;
2725 }
2726
2727 /*------------------------------------------*/
2728 static int qtudev_open(struct inode *inode, struct file *file)
2729 {
2730         struct uid_tag_data *utd_entry;
2731         struct proc_qtu_data  *pqd_entry;
2732         struct proc_qtu_data  *new_pqd_entry;
2733         int res;
2734         bool utd_entry_found;
2735
2736         if (unlikely(qtu_proc_handling_passive))
2737                 return 0;
2738
2739         DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2740                  current->pid, current->tgid, current_fsuid());
2741
2742         spin_lock_bh(&uid_tag_data_tree_lock);
2743
2744         /* Look for existing uid data, or alloc one. */
2745         utd_entry = get_uid_data(current_fsuid(), &utd_entry_found);
2746         if (IS_ERR_OR_NULL(utd_entry)) {
2747                 res = PTR_ERR(utd_entry);
2748                 goto err_unlock;
2749         }
2750
2751         /* Look for existing PID based proc_data */
2752         pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2753                                               current->tgid);
2754         if (pqd_entry) {
2755                 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2756                        "%s already opened\n",
2757                        current->pid, current->tgid, current_fsuid(),
2758                        QTU_DEV_NAME);
2759                 res = -EBUSY;
2760                 goto err_unlock_free_utd;
2761         }
2762
2763         new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2764         if (!new_pqd_entry) {
2765                 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2766                        "proc data alloc failed\n",
2767                        current->pid, current->tgid, current_fsuid());
2768                 res = -ENOMEM;
2769                 goto err_unlock_free_utd;
2770         }
2771         new_pqd_entry->pid = current->tgid;
2772         INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2773         new_pqd_entry->parent_tag_data = utd_entry;
2774         utd_entry->num_pqd++;
2775
2776         proc_qtu_data_tree_insert(new_pqd_entry,
2777                                   &proc_qtu_data_tree);
2778
2779         spin_unlock_bh(&uid_tag_data_tree_lock);
2780         DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2781                  current_fsuid(), new_pqd_entry);
2782         file->private_data = new_pqd_entry;
2783         return 0;
2784
2785 err_unlock_free_utd:
2786         if (!utd_entry_found) {
2787                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2788                 kfree(utd_entry);
2789         }
2790 err_unlock:
2791         spin_unlock_bh(&uid_tag_data_tree_lock);
2792         return res;
2793 }
2794
2795 static int qtudev_release(struct inode *inode, struct file *file)
2796 {
2797         struct proc_qtu_data  *pqd_entry = file->private_data;
2798         struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
2799         struct sock_tag *st_entry;
2800         struct rb_root st_to_free_tree = RB_ROOT;
2801         struct list_head *entry, *next;
2802         struct tag_ref *tr;
2803
2804         if (unlikely(qtu_proc_handling_passive))
2805                 return 0;
2806
2807         /*
2808          * Do not trust the current->pid, it might just be a kworker cleaning
2809          * up after a dead proc.
2810          */
2811         DR_DEBUG("qtaguid: qtudev_release(): "
2812                  "pid=%u tgid=%u uid=%u "
2813                  "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2814                  current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2815                  pqd_entry, pqd_entry->pid, utd_entry,
2816                  utd_entry->num_active_tags);
2817
2818         spin_lock_bh(&sock_tag_list_lock);
2819         spin_lock_bh(&uid_tag_data_tree_lock);
2820
2821         list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2822                 st_entry = list_entry(entry, struct sock_tag, list);
2823                 DR_DEBUG("qtaguid: %s(): "
2824                          "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2825                          __func__,
2826                          st_entry, st_entry->sk,
2827                          current->pid, current->tgid,
2828                          pqd_entry->parent_tag_data->uid);
2829
2830                 utd_entry = uid_tag_data_tree_search(
2831                         &uid_tag_data_tree,
2832                         get_uid_from_tag(st_entry->tag));
2833                 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2834                 DR_DEBUG("qtaguid: %s(): "
2835                          "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2836                          st_entry->tag, utd_entry);
2837                 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2838                                          st_entry->tag);
2839                 BUG_ON(!tr);
2840                 BUG_ON(tr->num_sock_tags <= 0);
2841                 tr->num_sock_tags--;
2842                 free_tag_ref_from_utd_entry(tr, utd_entry);
2843
2844                 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2845                 list_del(&st_entry->list);
2846                 /* Can't sockfd_put() within spinlock, do it later. */
2847                 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2848
2849                 /*
2850                  * Try to free the utd_entry if no other proc_qtu_data is
2851                  * using it (num_pqd is 0) and it doesn't have active tags
2852                  * (num_active_tags is 0).
2853                  */
2854                 put_utd_entry(utd_entry);
2855         }
2856
2857         rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2858         BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2859         pqd_entry->parent_tag_data->num_pqd--;
2860         put_utd_entry(pqd_entry->parent_tag_data);
2861         kfree(pqd_entry);
2862         file->private_data = NULL;
2863
2864         spin_unlock_bh(&uid_tag_data_tree_lock);
2865         spin_unlock_bh(&sock_tag_list_lock);
2866
2867
2868         sock_tag_tree_erase(&st_to_free_tree);
2869
2870         prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2871                            current->pid, current->tgid);
2872         return 0;
2873 }
2874
2875 /*------------------------------------------*/
2876 static const struct file_operations qtudev_fops = {
2877         .owner = THIS_MODULE,
2878         .open = qtudev_open,
2879         .release = qtudev_release,
2880 };
2881
2882 static struct miscdevice qtu_device = {
2883         .minor = MISC_DYNAMIC_MINOR,
2884         .name = QTU_DEV_NAME,
2885         .fops = &qtudev_fops,
2886         /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2887 };
2888
2889 static const struct seq_operations proc_qtaguid_ctrl_seqops = {
2890         .start = qtaguid_ctrl_proc_start,
2891         .next = qtaguid_ctrl_proc_next,
2892         .stop = qtaguid_ctrl_proc_stop,
2893         .show = qtaguid_ctrl_proc_show,
2894 };
2895
2896 static int proc_qtaguid_ctrl_open(struct inode *inode, struct file *file)
2897 {
2898         return seq_open_private(file, &proc_qtaguid_ctrl_seqops,
2899                                 sizeof(struct proc_ctrl_print_info));
2900 }
2901
2902 static const struct file_operations proc_qtaguid_ctrl_fops = {
2903         .open           = proc_qtaguid_ctrl_open,
2904         .read           = seq_read,
2905         .write          = qtaguid_ctrl_proc_write,
2906         .llseek         = seq_lseek,
2907         .release        = seq_release_private,
2908 };
2909
2910 static const struct seq_operations proc_qtaguid_stats_seqops = {
2911         .start = qtaguid_stats_proc_start,
2912         .next = qtaguid_stats_proc_next,
2913         .stop = qtaguid_stats_proc_stop,
2914         .show = qtaguid_stats_proc_show,
2915 };
2916
2917 static int proc_qtaguid_stats_open(struct inode *inode, struct file *file)
2918 {
2919         return seq_open_private(file, &proc_qtaguid_stats_seqops,
2920                                 sizeof(struct proc_print_info));
2921 }
2922
2923 static const struct file_operations proc_qtaguid_stats_fops = {
2924         .open           = proc_qtaguid_stats_open,
2925         .read           = seq_read,
2926         .llseek         = seq_lseek,
2927         .release        = seq_release_private,
2928 };
2929
2930 /*------------------------------------------*/
2931 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2932 {
2933         int ret;
2934         *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2935         if (!*res_procdir) {
2936                 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2937                 ret = -ENOMEM;
2938                 goto no_dir;
2939         }
2940
2941         xt_qtaguid_ctrl_file = proc_create_data("ctrl", proc_ctrl_perms,
2942                                                 *res_procdir,
2943                                                 &proc_qtaguid_ctrl_fops,
2944                                                 NULL);
2945         if (!xt_qtaguid_ctrl_file) {
2946                 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2947                         " file\n");
2948                 ret = -ENOMEM;
2949                 goto no_ctrl_entry;
2950         }
2951
2952         xt_qtaguid_stats_file = proc_create_data("stats", proc_stats_perms,
2953                                                  *res_procdir,
2954                                                  &proc_qtaguid_stats_fops,
2955                                                  NULL);
2956         if (!xt_qtaguid_stats_file) {
2957                 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2958                         "file\n");
2959                 ret = -ENOMEM;
2960                 goto no_stats_entry;
2961         }
2962         /*
2963          * TODO: add support counter hacking
2964          * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2965          */
2966         return 0;
2967
2968 no_stats_entry:
2969         remove_proc_entry("ctrl", *res_procdir);
2970 no_ctrl_entry:
2971         remove_proc_entry("xt_qtaguid", NULL);
2972 no_dir:
2973         return ret;
2974 }
2975
2976 static struct xt_match qtaguid_mt_reg __read_mostly = {
2977         /*
2978          * This module masquerades as the "owner" module so that iptables
2979          * tools can deal with it.
2980          */
2981         .name       = "owner",
2982         .revision   = 1,
2983         .family     = NFPROTO_UNSPEC,
2984         .match      = qtaguid_mt,
2985         .matchsize  = sizeof(struct xt_qtaguid_match_info),
2986         .me         = THIS_MODULE,
2987 };
2988
2989 static int __init qtaguid_mt_init(void)
2990 {
2991         if (qtaguid_proc_register(&xt_qtaguid_procdir)
2992             || iface_stat_init(xt_qtaguid_procdir)
2993             || xt_register_match(&qtaguid_mt_reg)
2994             || misc_register(&qtu_device))
2995                 return -1;
2996         return 0;
2997 }
2998
2999 /*
3000  * TODO: allow unloading of the module.
3001  * For now stats are permanent.
3002  * Kconfig forces'y/n' and never an 'm'.
3003  */
3004
3005 module_init(qtaguid_mt_init);
3006 MODULE_AUTHOR("jpa <jpa@google.com>");
3007 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
3008 MODULE_LICENSE("GPL");
3009 MODULE_ALIAS("ipt_owner");
3010 MODULE_ALIAS("ip6t_owner");
3011 MODULE_ALIAS("ipt_qtaguid");
3012 MODULE_ALIAS("ip6t_qtaguid");