kuid/kgid: fix building error when trying Docker
[linux-3.10.git] / net / netfilter / xt_qtaguid.c
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /*
12  * There are run-time debug flags enabled via the debug_mask module param, or
13  * via the DEFAULT_DEBUG_MASK. See xt_qtaguid_internal.h.
14  */
15 #define DEBUG
16
17 #include <linux/file.h>
18 #include <linux/inetdevice.h>
19 #include <linux/module.h>
20 #include <linux/netfilter/x_tables.h>
21 #include <linux/netfilter/xt_qtaguid.h>
22 #include <linux/ratelimit.h>
23 #include <linux/seq_file.h>
24 #include <linux/skbuff.h>
25 #include <linux/workqueue.h>
26 #include <net/addrconf.h>
27 #include <net/sock.h>
28 #include <net/tcp.h>
29 #include <net/udp.h>
30
31 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
32 #include <linux/netfilter_ipv6/ip6_tables.h>
33 #endif
34
35 #include <linux/netfilter/xt_socket.h>
36 #include "xt_qtaguid_internal.h"
37 #include "xt_qtaguid_print.h"
38 #include "../../fs/proc/internal.h"
39
40 /*
41  * We only use the xt_socket funcs within a similar context to avoid unexpected
42  * return values.
43  */
44 #define XT_SOCKET_SUPPORTED_HOOKS \
45         ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
46
47
48 static const char *module_procdirname = "xt_qtaguid";
49 static struct proc_dir_entry *xt_qtaguid_procdir;
50
51 static unsigned int proc_iface_perms = S_IRUGO;
52 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
53
54 static struct proc_dir_entry *xt_qtaguid_stats_file;
55 static unsigned int proc_stats_perms = S_IRUGO;
56 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
57
58 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
59
60 /* Everybody can write. But proc_ctrl_write_limited is true by default which
61  * limits what can be controlled. See the can_*() functions.
62  */
63 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
64 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
65
66 /* Limited by default, so the gid of the ctrl and stats proc entries
67  * will limit what can be done. See the can_*() functions.
68  */
69 static bool proc_stats_readall_limited = true;
70 static bool proc_ctrl_write_limited = true;
71
72 module_param_named(stats_readall_limited, proc_stats_readall_limited, bool,
73                    S_IRUGO | S_IWUSR);
74 module_param_named(ctrl_write_limited, proc_ctrl_write_limited, bool,
75                    S_IRUGO | S_IWUSR);
76
77 /*
78  * Limit the number of active tags (via socket tags) for a given UID.
79  * Multiple processes could share the UID.
80  */
81 static int max_sock_tags = DEFAULT_MAX_SOCK_TAGS;
82 module_param(max_sock_tags, int, S_IRUGO | S_IWUSR);
83
84 /*
85  * After the kernel has initiallized this module, it is still possible
86  * to make it passive.
87  * Setting passive to Y:
88  *  - the iface stats handling will not act on notifications.
89  *  - iptables matches will never match.
90  *  - ctrl commands silently succeed.
91  *  - stats are always empty.
92  * This is mostly usefull when a bug is suspected.
93  */
94 static bool module_passive;
95 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
96
97 /*
98  * Control how qtaguid data is tracked per proc/uid.
99  * Setting tag_tracking_passive to Y:
100  *  - don't create proc specific structs to track tags
101  *  - don't check that active tag stats exceed some limits.
102  *  - don't clean up socket tags on process exits.
103  * This is mostly usefull when a bug is suspected.
104  */
105 static bool qtu_proc_handling_passive;
106 module_param_named(tag_tracking_passive, qtu_proc_handling_passive, bool,
107                    S_IRUGO | S_IWUSR);
108
109 #define QTU_DEV_NAME "xt_qtaguid"
110
111 uint qtaguid_debug_mask = DEFAULT_DEBUG_MASK;
112 module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
113
114 /*---------------------------------------------------------------------------*/
115 static const char *iface_stat_procdirname = "iface_stat";
116 static struct proc_dir_entry *iface_stat_procdir;
117 /*
118  * The iface_stat_all* will go away once userspace gets use to the new fields
119  * that have a format line.
120  */
121 static const char *iface_stat_all_procfilename = "iface_stat_all";
122 static struct proc_dir_entry *iface_stat_all_procfile;
123 static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
124 static struct proc_dir_entry *iface_stat_fmt_procfile;
125
126
127 static LIST_HEAD(iface_stat_list);
128 static DEFINE_SPINLOCK(iface_stat_list_lock);
129
130 static struct rb_root sock_tag_tree = RB_ROOT;
131 static DEFINE_SPINLOCK(sock_tag_list_lock);
132
133 static struct rb_root tag_counter_set_tree = RB_ROOT;
134 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
135
136 static struct rb_root uid_tag_data_tree = RB_ROOT;
137 static DEFINE_SPINLOCK(uid_tag_data_tree_lock);
138
139 static struct rb_root proc_qtu_data_tree = RB_ROOT;
140 /* No proc_qtu_data_tree_lock; use uid_tag_data_tree_lock */
141
142 static struct qtaguid_event_counts qtu_events;
143 /*----------------------------------------------*/
144 static bool can_manipulate_uids(void)
145 {
146         /* root pwnd */
147         return in_egroup_p(xt_qtaguid_ctrl_file->gid)
148                 || unlikely(!__kuid_val(current_fsuid()))
149                 || unlikely(!proc_ctrl_write_limited)
150                 || unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid));
151 }
152
153 static bool can_impersonate_uid(uid_t uid)
154 {
155         return uid ==  __kuid_val(current_fsuid()) || can_manipulate_uids();
156 }
157
158 static bool can_read_other_uid_stats(uid_t uid)
159 {
160         /* root pwnd */
161         return in_egroup_p(xt_qtaguid_stats_file->gid)
162                 || unlikely(!__kuid_val(current_fsuid()))
163                 || uid == __kuid_val(current_fsuid())
164                 || unlikely(!proc_stats_readall_limited)
165                 || unlikely(uid_eq(current_fsuid(), xt_qtaguid_ctrl_file->uid));
166 }
167
168 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
169                                   enum ifs_tx_rx direction,
170                                   enum ifs_proto ifs_proto,
171                                   int bytes,
172                                   int packets)
173 {
174         counters->bpc[set][direction][ifs_proto].bytes += bytes;
175         counters->bpc[set][direction][ifs_proto].packets += packets;
176 }
177
178 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
179 {
180         struct rb_node *node = root->rb_node;
181
182         while (node) {
183                 struct tag_node *data = rb_entry(node, struct tag_node, node);
184                 int result;
185                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
186                          " node=%p data=%p\n", tag, node, data);
187                 result = tag_compare(tag, data->tag);
188                 RB_DEBUG("qtaguid: tag_node_tree_search(0x%llx): "
189                          " data.tag=0x%llx (uid=%u) res=%d\n",
190                          tag, data->tag, get_uid_from_tag(data->tag), result);
191                 if (result < 0)
192                         node = node->rb_left;
193                 else if (result > 0)
194                         node = node->rb_right;
195                 else
196                         return data;
197         }
198         return NULL;
199 }
200
201 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
202 {
203         struct rb_node **new = &(root->rb_node), *parent = NULL;
204
205         /* Figure out where to put new node */
206         while (*new) {
207                 struct tag_node *this = rb_entry(*new, struct tag_node,
208                                                  node);
209                 int result = tag_compare(data->tag, this->tag);
210                 RB_DEBUG("qtaguid: %s(): tag=0x%llx"
211                          " (uid=%u)\n", __func__,
212                          this->tag,
213                          get_uid_from_tag(this->tag));
214                 parent = *new;
215                 if (result < 0)
216                         new = &((*new)->rb_left);
217                 else if (result > 0)
218                         new = &((*new)->rb_right);
219                 else
220                         BUG();
221         }
222
223         /* Add new node and rebalance tree. */
224         rb_link_node(&data->node, parent, new);
225         rb_insert_color(&data->node, root);
226 }
227
228 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
229 {
230         tag_node_tree_insert(&data->tn, root);
231 }
232
233 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
234 {
235         struct tag_node *node = tag_node_tree_search(root, tag);
236         if (!node)
237                 return NULL;
238         return rb_entry(&node->node, struct tag_stat, tn.node);
239 }
240
241 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
242                                         struct rb_root *root)
243 {
244         tag_node_tree_insert(&data->tn, root);
245 }
246
247 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
248                                                            tag_t tag)
249 {
250         struct tag_node *node = tag_node_tree_search(root, tag);
251         if (!node)
252                 return NULL;
253         return rb_entry(&node->node, struct tag_counter_set, tn.node);
254
255 }
256
257 static void tag_ref_tree_insert(struct tag_ref *data, struct rb_root *root)
258 {
259         tag_node_tree_insert(&data->tn, root);
260 }
261
262 static struct tag_ref *tag_ref_tree_search(struct rb_root *root, tag_t tag)
263 {
264         struct tag_node *node = tag_node_tree_search(root, tag);
265         if (!node)
266                 return NULL;
267         return rb_entry(&node->node, struct tag_ref, tn.node);
268 }
269
270 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
271                                              const struct sock *sk)
272 {
273         struct rb_node *node = root->rb_node;
274
275         while (node) {
276                 struct sock_tag *data = rb_entry(node, struct sock_tag,
277                                                  sock_node);
278                 if (sk < data->sk)
279                         node = node->rb_left;
280                 else if (sk > data->sk)
281                         node = node->rb_right;
282                 else
283                         return data;
284         }
285         return NULL;
286 }
287
288 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
289 {
290         struct rb_node **new = &(root->rb_node), *parent = NULL;
291
292         /* Figure out where to put new node */
293         while (*new) {
294                 struct sock_tag *this = rb_entry(*new, struct sock_tag,
295                                                  sock_node);
296                 parent = *new;
297                 if (data->sk < this->sk)
298                         new = &((*new)->rb_left);
299                 else if (data->sk > this->sk)
300                         new = &((*new)->rb_right);
301                 else
302                         BUG();
303         }
304
305         /* Add new node and rebalance tree. */
306         rb_link_node(&data->sock_node, parent, new);
307         rb_insert_color(&data->sock_node, root);
308 }
309
310 static void sock_tag_tree_erase(struct rb_root *st_to_free_tree)
311 {
312         struct rb_node *node;
313         struct sock_tag *st_entry;
314
315         node = rb_first(st_to_free_tree);
316         while (node) {
317                 st_entry = rb_entry(node, struct sock_tag, sock_node);
318                 node = rb_next(node);
319                 CT_DEBUG("qtaguid: %s(): "
320                          "erase st: sk=%p tag=0x%llx (uid=%u)\n", __func__,
321                          st_entry->sk,
322                          st_entry->tag,
323                          get_uid_from_tag(st_entry->tag));
324                 rb_erase(&st_entry->sock_node, st_to_free_tree);
325                 sockfd_put(st_entry->socket);
326                 kfree(st_entry);
327         }
328 }
329
330 static struct proc_qtu_data *proc_qtu_data_tree_search(struct rb_root *root,
331                                                        const pid_t pid)
332 {
333         struct rb_node *node = root->rb_node;
334
335         while (node) {
336                 struct proc_qtu_data *data = rb_entry(node,
337                                                       struct proc_qtu_data,
338                                                       node);
339                 if (pid < data->pid)
340                         node = node->rb_left;
341                 else if (pid > data->pid)
342                         node = node->rb_right;
343                 else
344                         return data;
345         }
346         return NULL;
347 }
348
349 static void proc_qtu_data_tree_insert(struct proc_qtu_data *data,
350                                       struct rb_root *root)
351 {
352         struct rb_node **new = &(root->rb_node), *parent = NULL;
353
354         /* Figure out where to put new node */
355         while (*new) {
356                 struct proc_qtu_data *this = rb_entry(*new,
357                                                       struct proc_qtu_data,
358                                                       node);
359                 parent = *new;
360                 if (data->pid < this->pid)
361                         new = &((*new)->rb_left);
362                 else if (data->pid > this->pid)
363                         new = &((*new)->rb_right);
364                 else
365                         BUG();
366         }
367
368         /* Add new node and rebalance tree. */
369         rb_link_node(&data->node, parent, new);
370         rb_insert_color(&data->node, root);
371 }
372
373 static void uid_tag_data_tree_insert(struct uid_tag_data *data,
374                                      struct rb_root *root)
375 {
376         struct rb_node **new = &(root->rb_node), *parent = NULL;
377
378         /* Figure out where to put new node */
379         while (*new) {
380                 struct uid_tag_data *this = rb_entry(*new,
381                                                      struct uid_tag_data,
382                                                      node);
383                 parent = *new;
384                 if (data->uid < this->uid)
385                         new = &((*new)->rb_left);
386                 else if (data->uid > this->uid)
387                         new = &((*new)->rb_right);
388                 else
389                         BUG();
390         }
391
392         /* Add new node and rebalance tree. */
393         rb_link_node(&data->node, parent, new);
394         rb_insert_color(&data->node, root);
395 }
396
397 static struct uid_tag_data *uid_tag_data_tree_search(struct rb_root *root,
398                                                      uid_t uid)
399 {
400         struct rb_node *node = root->rb_node;
401
402         while (node) {
403                 struct uid_tag_data *data = rb_entry(node,
404                                                      struct uid_tag_data,
405                                                      node);
406                 if (uid < data->uid)
407                         node = node->rb_left;
408                 else if (uid > data->uid)
409                         node = node->rb_right;
410                 else
411                         return data;
412         }
413         return NULL;
414 }
415
416 /*
417  * Allocates a new uid_tag_data struct if needed.
418  * Returns a pointer to the found or allocated uid_tag_data.
419  * Returns a PTR_ERR on failures, and lock is not held.
420  * If found is not NULL:
421  *   sets *found to true if not allocated.
422  *   sets *found to false if allocated.
423  */
424 struct uid_tag_data *get_uid_data(uid_t uid, bool *found_res)
425 {
426         struct uid_tag_data *utd_entry;
427
428         /* Look for top level uid_tag_data for the UID */
429         utd_entry = uid_tag_data_tree_search(&uid_tag_data_tree, uid);
430         DR_DEBUG("qtaguid: get_uid_data(%u) utd=%p\n", uid, utd_entry);
431
432         if (found_res)
433                 *found_res = utd_entry;
434         if (utd_entry)
435                 return utd_entry;
436
437         utd_entry = kzalloc(sizeof(*utd_entry), GFP_ATOMIC);
438         if (!utd_entry) {
439                 pr_err("qtaguid: get_uid_data(%u): "
440                        "tag data alloc failed\n", uid);
441                 return ERR_PTR(-ENOMEM);
442         }
443
444         utd_entry->uid = uid;
445         utd_entry->tag_ref_tree = RB_ROOT;
446         uid_tag_data_tree_insert(utd_entry, &uid_tag_data_tree);
447         DR_DEBUG("qtaguid: get_uid_data(%u) new utd=%p\n", uid, utd_entry);
448         return utd_entry;
449 }
450
451 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
452 static struct tag_ref *new_tag_ref(tag_t new_tag,
453                                    struct uid_tag_data *utd_entry)
454 {
455         struct tag_ref *tr_entry;
456         int res;
457
458         if (utd_entry->num_active_tags + 1 > max_sock_tags) {
459                 pr_info("qtaguid: new_tag_ref(0x%llx): "
460                         "tag ref alloc quota exceeded. max=%d\n",
461                         new_tag, max_sock_tags);
462                 res = -EMFILE;
463                 goto err_res;
464
465         }
466
467         tr_entry = kzalloc(sizeof(*tr_entry), GFP_ATOMIC);
468         if (!tr_entry) {
469                 pr_err("qtaguid: new_tag_ref(0x%llx): "
470                        "tag ref alloc failed\n",
471                        new_tag);
472                 res = -ENOMEM;
473                 goto err_res;
474         }
475         tr_entry->tn.tag = new_tag;
476         /* tr_entry->num_sock_tags  handled by caller */
477         utd_entry->num_active_tags++;
478         tag_ref_tree_insert(tr_entry, &utd_entry->tag_ref_tree);
479         DR_DEBUG("qtaguid: new_tag_ref(0x%llx): "
480                  " inserted new tag ref %p\n",
481                  new_tag, tr_entry);
482         return tr_entry;
483
484 err_res:
485         return ERR_PTR(res);
486 }
487
488 static struct tag_ref *lookup_tag_ref(tag_t full_tag,
489                                       struct uid_tag_data **utd_res)
490 {
491         struct uid_tag_data *utd_entry;
492         struct tag_ref *tr_entry;
493         bool found_utd;
494         uid_t uid = get_uid_from_tag(full_tag);
495
496         DR_DEBUG("qtaguid: lookup_tag_ref(tag=0x%llx (uid=%u))\n",
497                  full_tag, uid);
498
499         utd_entry = get_uid_data(uid, &found_utd);
500         if (IS_ERR_OR_NULL(utd_entry)) {
501                 if (utd_res)
502                         *utd_res = utd_entry;
503                 return NULL;
504         }
505
506         tr_entry = tag_ref_tree_search(&utd_entry->tag_ref_tree, full_tag);
507         if (utd_res)
508                 *utd_res = utd_entry;
509         DR_DEBUG("qtaguid: lookup_tag_ref(0x%llx) utd_entry=%p tr_entry=%p\n",
510                  full_tag, utd_entry, tr_entry);
511         return tr_entry;
512 }
513
514 /* Never returns NULL. Either PTR_ERR or a valid ptr. */
515 static struct tag_ref *get_tag_ref(tag_t full_tag,
516                                    struct uid_tag_data **utd_res)
517 {
518         struct uid_tag_data *utd_entry;
519         struct tag_ref *tr_entry;
520
521         DR_DEBUG("qtaguid: get_tag_ref(0x%llx)\n",
522                  full_tag);
523         spin_lock_bh(&uid_tag_data_tree_lock);
524         tr_entry = lookup_tag_ref(full_tag, &utd_entry);
525         BUG_ON(IS_ERR_OR_NULL(utd_entry));
526         if (!tr_entry)
527                 tr_entry = new_tag_ref(full_tag, utd_entry);
528
529         spin_unlock_bh(&uid_tag_data_tree_lock);
530         if (utd_res)
531                 *utd_res = utd_entry;
532         DR_DEBUG("qtaguid: get_tag_ref(0x%llx) utd=%p tr=%p\n",
533                  full_tag, utd_entry, tr_entry);
534         return tr_entry;
535 }
536
537 /* Checks and maybe frees the UID Tag Data entry */
538 static void put_utd_entry(struct uid_tag_data *utd_entry)
539 {
540         /* Are we done with the UID tag data entry? */
541         if (RB_EMPTY_ROOT(&utd_entry->tag_ref_tree) &&
542                 !utd_entry->num_pqd) {
543                 DR_DEBUG("qtaguid: %s(): "
544                          "erase utd_entry=%p uid=%u "
545                          "by pid=%u tgid=%u uid=%u\n", __func__,
546                          utd_entry, utd_entry->uid,
547                          current->pid, current->tgid,
548                          __kuid_val(current_fsuid()));
549                 BUG_ON(utd_entry->num_active_tags);
550                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
551                 kfree(utd_entry);
552         } else {
553                 DR_DEBUG("qtaguid: %s(): "
554                          "utd_entry=%p still has %d tags %d proc_qtu_data\n",
555                          __func__, utd_entry, utd_entry->num_active_tags,
556                          utd_entry->num_pqd);
557                 BUG_ON(!(utd_entry->num_active_tags ||
558                          utd_entry->num_pqd));
559         }
560 }
561
562 /*
563  * If no sock_tags are using this tag_ref,
564  * decrements refcount of utd_entry, removes tr_entry
565  * from utd_entry->tag_ref_tree and frees.
566  */
567 static void free_tag_ref_from_utd_entry(struct tag_ref *tr_entry,
568                                         struct uid_tag_data *utd_entry)
569 {
570         DR_DEBUG("qtaguid: %s(): %p tag=0x%llx (uid=%u)\n", __func__,
571                  tr_entry, tr_entry->tn.tag,
572                  get_uid_from_tag(tr_entry->tn.tag));
573         if (!tr_entry->num_sock_tags) {
574                 BUG_ON(!utd_entry->num_active_tags);
575                 utd_entry->num_active_tags--;
576                 rb_erase(&tr_entry->tn.node, &utd_entry->tag_ref_tree);
577                 DR_DEBUG("qtaguid: %s(): erased %p\n", __func__, tr_entry);
578                 kfree(tr_entry);
579         }
580 }
581
582 static void put_tag_ref_tree(tag_t full_tag, struct uid_tag_data *utd_entry)
583 {
584         struct rb_node *node;
585         struct tag_ref *tr_entry;
586         tag_t acct_tag;
587
588         DR_DEBUG("qtaguid: %s(tag=0x%llx (uid=%u))\n", __func__,
589                  full_tag, get_uid_from_tag(full_tag));
590         acct_tag = get_atag_from_tag(full_tag);
591         node = rb_first(&utd_entry->tag_ref_tree);
592         while (node) {
593                 tr_entry = rb_entry(node, struct tag_ref, tn.node);
594                 node = rb_next(node);
595                 if (!acct_tag || tr_entry->tn.tag == full_tag)
596                         free_tag_ref_from_utd_entry(tr_entry, utd_entry);
597         }
598 }
599
600 static ssize_t read_proc_u64(struct file *file, char __user *buf,
601                          size_t size, loff_t *ppos)
602 {
603         uint64_t *valuep = PDE_DATA(file_inode(file));
604         char tmp[24];
605         size_t tmp_size;
606
607         tmp_size = scnprintf(tmp, sizeof(tmp), "%llu\n", *valuep);
608         return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
609 }
610
611 static ssize_t read_proc_bool(struct file *file, char __user *buf,
612                           size_t size, loff_t *ppos)
613 {
614         bool *valuep = PDE_DATA(file_inode(file));
615         char tmp[24];
616         size_t tmp_size;
617
618         tmp_size = scnprintf(tmp, sizeof(tmp), "%u\n", *valuep);
619         return simple_read_from_buffer(buf, size, ppos, tmp, tmp_size);
620 }
621
622 static int get_active_counter_set(tag_t tag)
623 {
624         int active_set = 0;
625         struct tag_counter_set *tcs;
626
627         MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
628                  " (uid=%u)\n",
629                  tag, get_uid_from_tag(tag));
630         /* For now we only handle UID tags for active sets */
631         tag = get_utag_from_tag(tag);
632         spin_lock_bh(&tag_counter_set_list_lock);
633         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
634         if (tcs)
635                 active_set = tcs->active_set;
636         spin_unlock_bh(&tag_counter_set_list_lock);
637         return active_set;
638 }
639
640 /*
641  * Find the entry for tracking the specified interface.
642  * Caller must hold iface_stat_list_lock
643  */
644 static struct iface_stat *get_iface_entry(const char *ifname)
645 {
646         struct iface_stat *iface_entry;
647
648         /* Find the entry for tracking the specified tag within the interface */
649         if (ifname == NULL) {
650                 pr_info("qtaguid: iface_stat: get() NULL device name\n");
651                 return NULL;
652         }
653
654         /* Iterate over interfaces */
655         list_for_each_entry(iface_entry, &iface_stat_list, list) {
656                 if (!strcmp(ifname, iface_entry->ifname))
657                         goto done;
658         }
659         iface_entry = NULL;
660 done:
661         return iface_entry;
662 }
663
664 /* This is for fmt2 only */
665 static void pp_iface_stat_header(struct seq_file *m)
666 {
667         seq_puts(m,
668                  "ifname "
669                  "total_skb_rx_bytes total_skb_rx_packets "
670                  "total_skb_tx_bytes total_skb_tx_packets "
671                  "rx_tcp_bytes rx_tcp_packets "
672                  "rx_udp_bytes rx_udp_packets "
673                  "rx_other_bytes rx_other_packets "
674                  "tx_tcp_bytes tx_tcp_packets "
675                  "tx_udp_bytes tx_udp_packets "
676                  "tx_other_bytes tx_other_packets\n"
677         );
678 }
679
680 static void pp_iface_stat_line(struct seq_file *m,
681                                struct iface_stat *iface_entry)
682 {
683         struct data_counters *cnts;
684         int cnt_set = 0;   /* We only use one set for the device */
685         cnts = &iface_entry->totals_via_skb;
686         seq_printf(m, "%s %llu %llu %llu %llu %llu %llu %llu %llu "
687                    "%llu %llu %llu %llu %llu %llu %llu %llu\n",
688                    iface_entry->ifname,
689                    dc_sum_bytes(cnts, cnt_set, IFS_RX),
690                    dc_sum_packets(cnts, cnt_set, IFS_RX),
691                    dc_sum_bytes(cnts, cnt_set, IFS_TX),
692                    dc_sum_packets(cnts, cnt_set, IFS_TX),
693                    cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
694                    cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
695                    cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
696                    cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
697                    cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
698                    cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
699                    cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
700                    cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
701                    cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
702                    cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
703                    cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
704                    cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
705 }
706
707 struct proc_iface_stat_fmt_info {
708         int fmt;
709 };
710
711 static void *iface_stat_fmt_proc_start(struct seq_file *m, loff_t *pos)
712 {
713         struct proc_iface_stat_fmt_info *p = m->private;
714         loff_t n = *pos;
715
716         /*
717          * This lock will prevent iface_stat_update() from changing active,
718          * and in turn prevent an interface from unregistering itself.
719          */
720         spin_lock_bh(&iface_stat_list_lock);
721
722         if (unlikely(module_passive))
723                 return NULL;
724
725         if (!n && p->fmt == 2)
726                 pp_iface_stat_header(m);
727
728         return seq_list_start(&iface_stat_list, n);
729 }
730
731 static void *iface_stat_fmt_proc_next(struct seq_file *m, void *p, loff_t *pos)
732 {
733         return seq_list_next(p, &iface_stat_list, pos);
734 }
735
736 static void iface_stat_fmt_proc_stop(struct seq_file *m, void *p)
737 {
738         spin_unlock_bh(&iface_stat_list_lock);
739 }
740
741 static int iface_stat_fmt_proc_show(struct seq_file *m, void *v)
742 {
743         struct proc_iface_stat_fmt_info *p = m->private;
744         struct iface_stat *iface_entry;
745         struct rtnl_link_stats64 dev_stats, *stats;
746         struct rtnl_link_stats64 no_dev_stats = {0};
747
748
749         CT_DEBUG("qtaguid:proc iface_stat_fmt pid=%u tgid=%u uid=%u\n",
750                  current->pid, current->tgid, __kuid_val(current_fsuid()));
751
752         iface_entry = list_entry(v, struct iface_stat, list);
753
754         if (iface_entry->active) {
755                 stats = dev_get_stats(iface_entry->net_dev,
756                                       &dev_stats);
757         } else {
758                 stats = &no_dev_stats;
759         }
760         /*
761          * If the meaning of the data changes, then update the fmtX
762          * string.
763          */
764         if (p->fmt == 1) {
765                 seq_printf(m, "%s %d %llu %llu %llu %llu %llu %llu %llu %llu\n",
766                            iface_entry->ifname,
767                            iface_entry->active,
768                            iface_entry->totals_via_dev[IFS_RX].bytes,
769                            iface_entry->totals_via_dev[IFS_RX].packets,
770                            iface_entry->totals_via_dev[IFS_TX].bytes,
771                            iface_entry->totals_via_dev[IFS_TX].packets,
772                            stats->rx_bytes, stats->rx_packets,
773                            stats->tx_bytes, stats->tx_packets
774                            );
775         } else {
776                 pp_iface_stat_line(m, iface_entry);
777         }
778         return 0;
779 }
780
781 static const struct file_operations read_u64_fops = {
782         .read           = read_proc_u64,
783         .llseek         = default_llseek,
784 };
785
786 static const struct file_operations read_bool_fops = {
787         .read           = read_proc_bool,
788         .llseek         = default_llseek,
789 };
790
791 static void iface_create_proc_worker(struct work_struct *work)
792 {
793         struct proc_dir_entry *proc_entry;
794         struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
795                                                    iface_work);
796         struct iface_stat *new_iface  = isw->iface_entry;
797
798         /* iface_entries are not deleted, so safe to manipulate. */
799         proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
800         if (IS_ERR_OR_NULL(proc_entry)) {
801                 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
802                 kfree(isw);
803                 return;
804         }
805
806         new_iface->proc_ptr = proc_entry;
807
808         proc_create_data("tx_bytes", proc_iface_perms, proc_entry,
809                          &read_u64_fops,
810                          &new_iface->totals_via_dev[IFS_TX].bytes);
811         proc_create_data("rx_bytes", proc_iface_perms, proc_entry,
812                          &read_u64_fops,
813                          &new_iface->totals_via_dev[IFS_RX].bytes);
814         proc_create_data("tx_packets", proc_iface_perms, proc_entry,
815                          &read_u64_fops,
816                          &new_iface->totals_via_dev[IFS_TX].packets);
817         proc_create_data("rx_packets", proc_iface_perms, proc_entry,
818                          &read_u64_fops,
819                          &new_iface->totals_via_dev[IFS_RX].packets);
820         proc_create_data("active", proc_iface_perms, proc_entry,
821                          &read_bool_fops, &new_iface->active);
822
823         IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
824                  "entry=%p dev=%s\n", new_iface, new_iface->ifname);
825         kfree(isw);
826 }
827
828 /*
829  * Will set the entry's active state, and
830  * update the net_dev accordingly also.
831  */
832 static void _iface_stat_set_active(struct iface_stat *entry,
833                                    struct net_device *net_dev,
834                                    bool activate)
835 {
836         if (activate) {
837                 entry->net_dev = net_dev;
838                 entry->active = true;
839                 IF_DEBUG("qtaguid: %s(%s): "
840                          "enable tracking. rfcnt=%d\n", __func__,
841                          entry->ifname,
842                          __this_cpu_read(*net_dev->pcpu_refcnt));
843         } else {
844                 entry->active = false;
845                 entry->net_dev = NULL;
846                 IF_DEBUG("qtaguid: %s(%s): "
847                          "disable tracking. rfcnt=%d\n", __func__,
848                          entry->ifname,
849                          __this_cpu_read(*net_dev->pcpu_refcnt));
850
851         }
852 }
853
854 /* Caller must hold iface_stat_list_lock */
855 static struct iface_stat *iface_alloc(struct net_device *net_dev)
856 {
857         struct iface_stat *new_iface;
858         struct iface_stat_work *isw;
859
860         new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
861         if (new_iface == NULL) {
862                 pr_err("qtaguid: iface_stat: create(%s): "
863                        "iface_stat alloc failed\n", net_dev->name);
864                 return NULL;
865         }
866         new_iface->ifname = kstrdup(net_dev->name, GFP_ATOMIC);
867         if (new_iface->ifname == NULL) {
868                 pr_err("qtaguid: iface_stat: create(%s): "
869                        "ifname alloc failed\n", net_dev->name);
870                 kfree(new_iface);
871                 return NULL;
872         }
873         spin_lock_init(&new_iface->tag_stat_list_lock);
874         new_iface->tag_stat_tree = RB_ROOT;
875         _iface_stat_set_active(new_iface, net_dev, true);
876
877         /*
878          * ipv6 notifier chains are atomic :(
879          * No create_proc_read_entry() for you!
880          */
881         isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
882         if (!isw) {
883                 pr_err("qtaguid: iface_stat: create(%s): "
884                        "work alloc failed\n", new_iface->ifname);
885                 _iface_stat_set_active(new_iface, net_dev, false);
886                 kfree(new_iface->ifname);
887                 kfree(new_iface);
888                 return NULL;
889         }
890         isw->iface_entry = new_iface;
891         INIT_WORK(&isw->iface_work, iface_create_proc_worker);
892         schedule_work(&isw->iface_work);
893         list_add(&new_iface->list, &iface_stat_list);
894         return new_iface;
895 }
896
897 static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
898                                                struct iface_stat *iface)
899 {
900         struct rtnl_link_stats64 dev_stats, *stats;
901         bool stats_rewound;
902
903         stats = dev_get_stats(net_dev, &dev_stats);
904         /* No empty packets */
905         stats_rewound =
906                 (stats->rx_bytes < iface->last_known[IFS_RX].bytes)
907                 || (stats->tx_bytes < iface->last_known[IFS_TX].bytes);
908
909         IF_DEBUG("qtaguid: %s(%s): iface=%p netdev=%p "
910                  "bytes rx/tx=%llu/%llu "
911                  "active=%d last_known=%d "
912                  "stats_rewound=%d\n", __func__,
913                  net_dev ? net_dev->name : "?",
914                  iface, net_dev,
915                  stats->rx_bytes, stats->tx_bytes,
916                  iface->active, iface->last_known_valid, stats_rewound);
917
918         if (iface->active && iface->last_known_valid && stats_rewound) {
919                 pr_warn_once("qtaguid: iface_stat: %s(%s): "
920                              "iface reset its stats unexpectedly\n", __func__,
921                              net_dev->name);
922
923                 iface->totals_via_dev[IFS_TX].bytes +=
924                         iface->last_known[IFS_TX].bytes;
925                 iface->totals_via_dev[IFS_TX].packets +=
926                         iface->last_known[IFS_TX].packets;
927                 iface->totals_via_dev[IFS_RX].bytes +=
928                         iface->last_known[IFS_RX].bytes;
929                 iface->totals_via_dev[IFS_RX].packets +=
930                         iface->last_known[IFS_RX].packets;
931                 iface->last_known_valid = false;
932                 IF_DEBUG("qtaguid: %s(%s): iface=%p "
933                          "used last known bytes rx/tx=%llu/%llu\n", __func__,
934                          iface->ifname, iface, iface->last_known[IFS_RX].bytes,
935                          iface->last_known[IFS_TX].bytes);
936         }
937 }
938
939 /*
940  * Create a new entry for tracking the specified interface.
941  * Do nothing if the entry already exists.
942  * Called when an interface is configured with a valid IP address.
943  */
944 static void iface_stat_create(struct net_device *net_dev,
945                               struct in_ifaddr *ifa)
946 {
947         struct in_device *in_dev = NULL;
948         const char *ifname;
949         struct iface_stat *entry;
950         __be32 ipaddr = 0;
951         struct iface_stat *new_iface;
952
953         IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
954                  net_dev ? net_dev->name : "?",
955                  ifa, net_dev);
956         if (!net_dev) {
957                 pr_err("qtaguid: iface_stat: create(): no net dev\n");
958                 return;
959         }
960
961         ifname = net_dev->name;
962         if (!ifa) {
963                 in_dev = in_dev_get(net_dev);
964                 if (!in_dev) {
965                         pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
966                                ifname);
967                         return;
968                 }
969                 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
970                          ifname, in_dev);
971                 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
972                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
973                                  "ifa=%p ifa_label=%s\n",
974                                  ifname, ifa,
975                                  ifa->ifa_label ? ifa->ifa_label : "(null)");
976                         if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
977                                 break;
978                 }
979         }
980
981         if (!ifa) {
982                 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
983                          ifname);
984                 goto done_put;
985         }
986         ipaddr = ifa->ifa_local;
987
988         spin_lock_bh(&iface_stat_list_lock);
989         entry = get_iface_entry(ifname);
990         if (entry != NULL) {
991                 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
992                          ifname, entry);
993                 iface_check_stats_reset_and_adjust(net_dev, entry);
994                 _iface_stat_set_active(entry, net_dev, true);
995                 IF_DEBUG("qtaguid: %s(%s): "
996                          "tracking now %d on ip=%pI4\n", __func__,
997                          entry->ifname, true, &ipaddr);
998                 goto done_unlock_put;
999         }
1000
1001         new_iface = iface_alloc(net_dev);
1002         IF_DEBUG("qtaguid: iface_stat: create(%s): done "
1003                  "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
1004 done_unlock_put:
1005         spin_unlock_bh(&iface_stat_list_lock);
1006 done_put:
1007         if (in_dev)
1008                 in_dev_put(in_dev);
1009 }
1010
1011 static void iface_stat_create_ipv6(struct net_device *net_dev,
1012                                    struct inet6_ifaddr *ifa)
1013 {
1014         struct in_device *in_dev;
1015         const char *ifname;
1016         struct iface_stat *entry;
1017         struct iface_stat *new_iface;
1018         int addr_type;
1019
1020         IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
1021                  ifa, net_dev, net_dev ? net_dev->name : "");
1022         if (!net_dev) {
1023                 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
1024                 return;
1025         }
1026         ifname = net_dev->name;
1027
1028         in_dev = in_dev_get(net_dev);
1029         if (!in_dev) {
1030                 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
1031                        ifname);
1032                 return;
1033         }
1034
1035         IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
1036                  ifname, in_dev);
1037
1038         if (!ifa) {
1039                 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
1040                          ifname);
1041                 goto done_put;
1042         }
1043         addr_type = ipv6_addr_type(&ifa->addr);
1044
1045         spin_lock_bh(&iface_stat_list_lock);
1046         entry = get_iface_entry(ifname);
1047         if (entry != NULL) {
1048                 IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1049                          ifname, entry);
1050                 iface_check_stats_reset_and_adjust(net_dev, entry);
1051                 _iface_stat_set_active(entry, net_dev, true);
1052                 IF_DEBUG("qtaguid: %s(%s): "
1053                          "tracking now %d on ip=%pI6c\n", __func__,
1054                          entry->ifname, true, &ifa->addr);
1055                 goto done_unlock_put;
1056         }
1057
1058         new_iface = iface_alloc(net_dev);
1059         IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
1060                  "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
1061
1062 done_unlock_put:
1063         spin_unlock_bh(&iface_stat_list_lock);
1064 done_put:
1065         in_dev_put(in_dev);
1066 }
1067
1068 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
1069 {
1070         MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
1071         return sock_tag_tree_search(&sock_tag_tree, sk);
1072 }
1073
1074 static struct sock_tag *get_sock_stat(const struct sock *sk)
1075 {
1076         struct sock_tag *sock_tag_entry;
1077         MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
1078         if (!sk)
1079                 return NULL;
1080         spin_lock_bh(&sock_tag_list_lock);
1081         sock_tag_entry = get_sock_stat_nl(sk);
1082         spin_unlock_bh(&sock_tag_list_lock);
1083         return sock_tag_entry;
1084 }
1085
1086 static int ipx_proto(const struct sk_buff *skb,
1087                      struct xt_action_param *par)
1088 {
1089         int thoff = 0, tproto;
1090
1091         switch (par->family) {
1092         case NFPROTO_IPV6:
1093                 tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
1094                 if (tproto < 0)
1095                         MT_DEBUG("%s(): transport header not found in ipv6"
1096                                  " skb=%p\n", __func__, skb);
1097                 break;
1098         case NFPROTO_IPV4:
1099                 tproto = ip_hdr(skb)->protocol;
1100                 break;
1101         default:
1102                 tproto = IPPROTO_RAW;
1103         }
1104         return tproto;
1105 }
1106
1107 static void
1108 data_counters_update(struct data_counters *dc, int set,
1109                      enum ifs_tx_rx direction, int proto, int bytes)
1110 {
1111         switch (proto) {
1112         case IPPROTO_TCP:
1113                 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
1114                 break;
1115         case IPPROTO_UDP:
1116                 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
1117                 break;
1118         case IPPROTO_IP:
1119         default:
1120                 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
1121                                     1);
1122                 break;
1123         }
1124 }
1125
1126 /*
1127  * Update stats for the specified interface. Do nothing if the entry
1128  * does not exist (when a device was never configured with an IP address).
1129  * Called when an device is being unregistered.
1130  */
1131 static void iface_stat_update(struct net_device *net_dev, bool stash_only)
1132 {
1133         struct rtnl_link_stats64 dev_stats, *stats;
1134         struct iface_stat *entry;
1135
1136         stats = dev_get_stats(net_dev, &dev_stats);
1137         spin_lock_bh(&iface_stat_list_lock);
1138         entry = get_iface_entry(net_dev->name);
1139         if (entry == NULL) {
1140                 IF_DEBUG("qtaguid: iface_stat: update(%s): not tracked\n",
1141                          net_dev->name);
1142                 spin_unlock_bh(&iface_stat_list_lock);
1143                 return;
1144         }
1145
1146         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1147                  net_dev->name, entry);
1148         if (!entry->active) {
1149                 IF_DEBUG("qtaguid: %s(%s): already disabled\n", __func__,
1150                          net_dev->name);
1151                 spin_unlock_bh(&iface_stat_list_lock);
1152                 return;
1153         }
1154
1155         if (stash_only) {
1156                 entry->last_known[IFS_TX].bytes = stats->tx_bytes;
1157                 entry->last_known[IFS_TX].packets = stats->tx_packets;
1158                 entry->last_known[IFS_RX].bytes = stats->rx_bytes;
1159                 entry->last_known[IFS_RX].packets = stats->rx_packets;
1160                 entry->last_known_valid = true;
1161                 IF_DEBUG("qtaguid: %s(%s): "
1162                          "dev stats stashed rx/tx=%llu/%llu\n", __func__,
1163                          net_dev->name, stats->rx_bytes, stats->tx_bytes);
1164                 spin_unlock_bh(&iface_stat_list_lock);
1165                 return;
1166         }
1167         entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
1168         entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
1169         entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
1170         entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
1171         /* We don't need the last_known[] anymore */
1172         entry->last_known_valid = false;
1173         _iface_stat_set_active(entry, net_dev, false);
1174         IF_DEBUG("qtaguid: %s(%s): "
1175                  "disable tracking. rx/tx=%llu/%llu\n", __func__,
1176                  net_dev->name, stats->rx_bytes, stats->tx_bytes);
1177         spin_unlock_bh(&iface_stat_list_lock);
1178 }
1179
1180 /*
1181  * Update stats for the specified interface from the skb.
1182  * Do nothing if the entry
1183  * does not exist (when a device was never configured with an IP address).
1184  * Called on each sk.
1185  */
1186 static void iface_stat_update_from_skb(const struct sk_buff *skb,
1187                                        struct xt_action_param *par)
1188 {
1189         struct iface_stat *entry;
1190         const struct net_device *el_dev;
1191         enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
1192         int bytes = skb->len;
1193         int proto;
1194
1195         if (!skb->dev) {
1196                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1197                 el_dev = par->in ? : par->out;
1198         } else {
1199                 const struct net_device *other_dev;
1200                 el_dev = skb->dev;
1201                 other_dev = par->in ? : par->out;
1202                 if (el_dev != other_dev) {
1203                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1204                                  "par->(in/out)=%p %s\n",
1205                                  par->hooknum, el_dev, el_dev->name, other_dev,
1206                                  other_dev->name);
1207                 }
1208         }
1209
1210         if (unlikely(!el_dev)) {
1211                 pr_err_ratelimited("qtaguid[%d]: %s(): no par->in/out?!!\n",
1212                                    par->hooknum, __func__);
1213                 BUG();
1214         } else if (unlikely(!el_dev->name)) {
1215                 pr_err_ratelimited("qtaguid[%d]: %s(): no dev->name?!!\n",
1216                                    par->hooknum, __func__);
1217                 BUG();
1218         } else {
1219                 proto = ipx_proto(skb, par);
1220                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1221                          par->hooknum, el_dev->name, el_dev->type,
1222                          par->family, proto);
1223         }
1224
1225         spin_lock_bh(&iface_stat_list_lock);
1226         entry = get_iface_entry(el_dev->name);
1227         if (entry == NULL) {
1228                 IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
1229                          __func__, el_dev->name);
1230                 spin_unlock_bh(&iface_stat_list_lock);
1231                 return;
1232         }
1233
1234         IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
1235                  el_dev->name, entry);
1236
1237         data_counters_update(&entry->totals_via_skb, 0, direction, proto,
1238                              bytes);
1239         spin_unlock_bh(&iface_stat_list_lock);
1240 }
1241
1242 static void tag_stat_update(struct tag_stat *tag_entry,
1243                         enum ifs_tx_rx direction, int proto, int bytes)
1244 {
1245         int active_set;
1246         active_set = get_active_counter_set(tag_entry->tn.tag);
1247         MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
1248                  "dir=%d proto=%d bytes=%d)\n",
1249                  tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
1250                  active_set, direction, proto, bytes);
1251         data_counters_update(&tag_entry->counters, active_set, direction,
1252                              proto, bytes);
1253         if (tag_entry->parent_counters)
1254                 data_counters_update(tag_entry->parent_counters, active_set,
1255                                      direction, proto, bytes);
1256 }
1257
1258 /*
1259  * Create a new entry for tracking the specified {acct_tag,uid_tag} within
1260  * the interface.
1261  * iface_entry->tag_stat_list_lock should be held.
1262  */
1263 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
1264                                            tag_t tag)
1265 {
1266         struct tag_stat *new_tag_stat_entry = NULL;
1267         IF_DEBUG("qtaguid: iface_stat: %s(): ife=%p tag=0x%llx"
1268                  " (uid=%u)\n", __func__,
1269                  iface_entry, tag, get_uid_from_tag(tag));
1270         new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
1271         if (!new_tag_stat_entry) {
1272                 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
1273                 goto done;
1274         }
1275         new_tag_stat_entry->tn.tag = tag;
1276         tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
1277 done:
1278         return new_tag_stat_entry;
1279 }
1280
1281 static void if_tag_stat_update(const char *ifname, uid_t uid,
1282                                const struct sock *sk, enum ifs_tx_rx direction,
1283                                int proto, int bytes)
1284 {
1285         struct tag_stat *tag_stat_entry;
1286         tag_t tag, acct_tag;
1287         tag_t uid_tag;
1288         struct data_counters *uid_tag_counters;
1289         struct sock_tag *sock_tag_entry;
1290         struct iface_stat *iface_entry;
1291         struct tag_stat *new_tag_stat = NULL;
1292         MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
1293                 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
1294                  ifname, uid, sk, direction, proto, bytes);
1295
1296
1297         iface_entry = get_iface_entry(ifname);
1298         if (!iface_entry) {
1299                 pr_err_ratelimited("qtaguid: iface_stat: stat_update() "
1300                                    "%s not found\n", ifname);
1301                 return;
1302         }
1303         /* It is ok to process data when an iface_entry is inactive */
1304
1305         MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
1306                  ifname, iface_entry);
1307
1308         /*
1309          * Look for a tagged sock.
1310          * It will have an acct_uid.
1311          */
1312         sock_tag_entry = get_sock_stat(sk);
1313         if (sock_tag_entry) {
1314                 tag = sock_tag_entry->tag;
1315                 acct_tag = get_atag_from_tag(tag);
1316                 uid_tag = get_utag_from_tag(tag);
1317         } else {
1318                 acct_tag = make_atag_from_value(0);
1319                 tag = combine_atag_with_uid(acct_tag, uid);
1320                 uid_tag = make_tag_from_uid(uid);
1321         }
1322         MT_DEBUG("qtaguid: iface_stat: stat_update(): "
1323                  " looking for tag=0x%llx (uid=%u) in ife=%p\n",
1324                  tag, get_uid_from_tag(tag), iface_entry);
1325         /* Loop over tag list under this interface for {acct_tag,uid_tag} */
1326         spin_lock_bh(&iface_entry->tag_stat_list_lock);
1327
1328         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1329                                               tag);
1330         if (tag_stat_entry) {
1331                 /*
1332                  * Updating the {acct_tag, uid_tag} entry handles both stats:
1333                  * {0, uid_tag} will also get updated.
1334                  */
1335                 tag_stat_update(tag_stat_entry, direction, proto, bytes);
1336                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1337                 return;
1338         }
1339
1340         /* Loop over tag list under this interface for {0,uid_tag} */
1341         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
1342                                               uid_tag);
1343         if (!tag_stat_entry) {
1344                 /* Here: the base uid_tag did not exist */
1345                 /*
1346                  * No parent counters. So
1347                  *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
1348                  */
1349                 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
1350                 if (!new_tag_stat)
1351                         goto unlock;
1352                 uid_tag_counters = &new_tag_stat->counters;
1353         } else {
1354                 uid_tag_counters = &tag_stat_entry->counters;
1355         }
1356
1357         if (acct_tag) {
1358                 /* Create the child {acct_tag, uid_tag} and hook up parent. */
1359                 new_tag_stat = create_if_tag_stat(iface_entry, tag);
1360                 if (!new_tag_stat)
1361                         goto unlock;
1362                 new_tag_stat->parent_counters = uid_tag_counters;
1363         } else {
1364                 /*
1365                  * For new_tag_stat to be still NULL here would require:
1366                  *  {0, uid_tag} exists
1367                  *  and {acct_tag, uid_tag} doesn't exist
1368                  *  AND acct_tag == 0.
1369                  * Impossible. This reassures us that new_tag_stat
1370                  * below will always be assigned.
1371                  */
1372                 BUG_ON(!new_tag_stat);
1373         }
1374         tag_stat_update(new_tag_stat, direction, proto, bytes);
1375 unlock:
1376         spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1377 }
1378
1379 static int iface_netdev_event_handler(struct notifier_block *nb,
1380                                       unsigned long event, void *ptr) {
1381         struct net_device *dev = ptr;
1382
1383         if (unlikely(module_passive))
1384                 return NOTIFY_DONE;
1385
1386         IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
1387                  "ev=0x%lx/%s netdev=%p->name=%s\n",
1388                  event, netdev_evt_str(event), dev, dev ? dev->name : "");
1389
1390         switch (event) {
1391         case NETDEV_UP:
1392                 iface_stat_create(dev, NULL);
1393                 atomic64_inc(&qtu_events.iface_events);
1394                 break;
1395         case NETDEV_DOWN:
1396         case NETDEV_UNREGISTER:
1397                 iface_stat_update(dev, event == NETDEV_DOWN);
1398                 atomic64_inc(&qtu_events.iface_events);
1399                 break;
1400         }
1401         return NOTIFY_DONE;
1402 }
1403
1404 static int iface_inet6addr_event_handler(struct notifier_block *nb,
1405                                          unsigned long event, void *ptr)
1406 {
1407         struct inet6_ifaddr *ifa = ptr;
1408         struct net_device *dev;
1409
1410         if (unlikely(module_passive))
1411                 return NOTIFY_DONE;
1412
1413         IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
1414                  "ev=0x%lx/%s ifa=%p\n",
1415                  event, netdev_evt_str(event), ifa);
1416
1417         switch (event) {
1418         case NETDEV_UP:
1419                 BUG_ON(!ifa || !ifa->idev);
1420                 dev = (struct net_device *)ifa->idev->dev;
1421                 iface_stat_create_ipv6(dev, ifa);
1422                 atomic64_inc(&qtu_events.iface_events);
1423                 break;
1424         case NETDEV_DOWN:
1425         case NETDEV_UNREGISTER:
1426                 BUG_ON(!ifa || !ifa->idev);
1427                 dev = (struct net_device *)ifa->idev->dev;
1428                 iface_stat_update(dev, event == NETDEV_DOWN);
1429                 atomic64_inc(&qtu_events.iface_events);
1430                 break;
1431         }
1432         return NOTIFY_DONE;
1433 }
1434
1435 static int iface_inetaddr_event_handler(struct notifier_block *nb,
1436                                         unsigned long event, void *ptr)
1437 {
1438         struct in_ifaddr *ifa = ptr;
1439         struct net_device *dev;
1440
1441         if (unlikely(module_passive))
1442                 return NOTIFY_DONE;
1443
1444         IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
1445                  "ev=0x%lx/%s ifa=%p\n",
1446                  event, netdev_evt_str(event), ifa);
1447
1448         switch (event) {
1449         case NETDEV_UP:
1450                 BUG_ON(!ifa || !ifa->ifa_dev);
1451                 dev = ifa->ifa_dev->dev;
1452                 iface_stat_create(dev, ifa);
1453                 atomic64_inc(&qtu_events.iface_events);
1454                 break;
1455         case NETDEV_DOWN:
1456         case NETDEV_UNREGISTER:
1457                 BUG_ON(!ifa || !ifa->ifa_dev);
1458                 dev = ifa->ifa_dev->dev;
1459                 iface_stat_update(dev, event == NETDEV_DOWN);
1460                 atomic64_inc(&qtu_events.iface_events);
1461                 break;
1462         }
1463         return NOTIFY_DONE;
1464 }
1465
1466 static struct notifier_block iface_netdev_notifier_blk = {
1467         .notifier_call = iface_netdev_event_handler,
1468 };
1469
1470 static struct notifier_block iface_inetaddr_notifier_blk = {
1471         .notifier_call = iface_inetaddr_event_handler,
1472 };
1473
1474 static struct notifier_block iface_inet6addr_notifier_blk = {
1475         .notifier_call = iface_inet6addr_event_handler,
1476 };
1477
1478 static const struct seq_operations iface_stat_fmt_proc_seq_ops = {
1479         .start  = iface_stat_fmt_proc_start,
1480         .next   = iface_stat_fmt_proc_next,
1481         .stop   = iface_stat_fmt_proc_stop,
1482         .show   = iface_stat_fmt_proc_show,
1483 };
1484
1485 static int proc_iface_stat_fmt_open(struct inode *inode, struct file *file)
1486 {
1487         struct proc_iface_stat_fmt_info *s;
1488
1489         s = __seq_open_private(file, &iface_stat_fmt_proc_seq_ops,
1490                         sizeof(struct proc_iface_stat_fmt_info));
1491         if (!s)
1492                 return -ENOMEM;
1493
1494         s->fmt = (uintptr_t)PDE_DATA(inode);
1495         return 0;
1496 }
1497
1498 static const struct file_operations proc_iface_stat_fmt_fops = {
1499         .open           = proc_iface_stat_fmt_open,
1500         .read           = seq_read,
1501         .llseek         = seq_lseek,
1502         .release        = seq_release_private,
1503 };
1504
1505 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
1506 {
1507         int err;
1508
1509         iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1510         if (!iface_stat_procdir) {
1511                 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1512                 err = -1;
1513                 goto err;
1514         }
1515
1516         iface_stat_all_procfile = proc_create_data(iface_stat_all_procfilename,
1517                                                    proc_iface_perms,
1518                                                    parent_procdir,
1519                                                    &proc_iface_stat_fmt_fops,
1520                                                    (void *)1 /* fmt1 */);
1521         if (!iface_stat_all_procfile) {
1522                 pr_err("qtaguid: iface_stat: init "
1523                        " failed to create stat_old proc entry\n");
1524                 err = -1;
1525                 goto err_zap_entry;
1526         }
1527
1528         iface_stat_fmt_procfile = proc_create_data(iface_stat_fmt_procfilename,
1529                                                    proc_iface_perms,
1530                                                    parent_procdir,
1531                                                    &proc_iface_stat_fmt_fops,
1532                                                    (void *)2 /* fmt2 */);
1533         if (!iface_stat_fmt_procfile) {
1534                 pr_err("qtaguid: iface_stat: init "
1535                        " failed to create stat_all proc entry\n");
1536                 err = -1;
1537                 goto err_zap_all_stats_entry;
1538         }
1539
1540
1541         err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1542         if (err) {
1543                 pr_err("qtaguid: iface_stat: init "
1544                        "failed to register dev event handler\n");
1545                 goto err_zap_all_stats_entries;
1546         }
1547         err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1548         if (err) {
1549                 pr_err("qtaguid: iface_stat: init "
1550                        "failed to register ipv4 dev event handler\n");
1551                 goto err_unreg_nd;
1552         }
1553
1554         err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1555         if (err) {
1556                 pr_err("qtaguid: iface_stat: init "
1557                        "failed to register ipv6 dev event handler\n");
1558                 goto err_unreg_ip4_addr;
1559         }
1560         return 0;
1561
1562 err_unreg_ip4_addr:
1563         unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1564 err_unreg_nd:
1565         unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1566 err_zap_all_stats_entries:
1567         remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
1568 err_zap_all_stats_entry:
1569         remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
1570 err_zap_entry:
1571         remove_proc_entry(iface_stat_procdirname, parent_procdir);
1572 err:
1573         return err;
1574 }
1575
1576 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1577                                     struct xt_action_param *par)
1578 {
1579         struct sock *sk;
1580         unsigned int hook_mask = (1 << par->hooknum);
1581
1582         MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1583                  par->hooknum, par->family);
1584
1585         /*
1586          * Let's not abuse the the xt_socket_get*_sk(), or else it will
1587          * return garbage SKs.
1588          */
1589         if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1590                 return NULL;
1591
1592         switch (par->family) {
1593         case NFPROTO_IPV6:
1594                 sk = xt_socket_get6_sk(skb, par);
1595                 break;
1596         case NFPROTO_IPV4:
1597                 sk = xt_socket_get4_sk(skb, par);
1598                 break;
1599         default:
1600                 return NULL;
1601         }
1602
1603         if (sk) {
1604                 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1605                          "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1606                 /*
1607                  * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1608                  * "struct inet_timewait_sock" which is missing fields.
1609                  */
1610                 if (sk->sk_state  == TCP_TIME_WAIT) {
1611                         xt_socket_put_sk(sk);
1612                         sk = NULL;
1613                 }
1614         }
1615         return sk;
1616 }
1617
1618 static void account_for_uid(const struct sk_buff *skb,
1619                             const struct sock *alternate_sk, uid_t uid,
1620                             struct xt_action_param *par)
1621 {
1622         const struct net_device *el_dev;
1623
1624         if (!skb->dev) {
1625                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1626                 el_dev = par->in ? : par->out;
1627         } else {
1628                 const struct net_device *other_dev;
1629                 el_dev = skb->dev;
1630                 other_dev = par->in ? : par->out;
1631                 if (el_dev != other_dev) {
1632                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1633                                 "par->(in/out)=%p %s\n",
1634                                 par->hooknum, el_dev, el_dev->name, other_dev,
1635                                 other_dev->name);
1636                 }
1637         }
1638
1639         if (unlikely(!el_dev)) {
1640                 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1641         } else if (unlikely(!el_dev->name)) {
1642                 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1643         } else {
1644                 int proto = ipx_proto(skb, par);
1645                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
1646                          par->hooknum, el_dev->name, el_dev->type,
1647                          par->family, proto);
1648
1649                 if_tag_stat_update(el_dev->name, uid,
1650                                 skb->sk ? skb->sk : alternate_sk,
1651                                 par->in ? IFS_RX : IFS_TX,
1652                                 proto, skb->len);
1653         }
1654 }
1655
1656 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1657 {
1658         const struct xt_qtaguid_match_info *info = par->matchinfo;
1659         const struct file *filp;
1660         bool got_sock = false;
1661         struct sock *sk;
1662         uid_t sock_uid;
1663         bool res;
1664         bool set_sk_callback_lock = false;
1665
1666         if (unlikely(module_passive))
1667                 return (info->match ^ info->invert) == 0;
1668
1669         MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1670                  par->hooknum, skb, par->in, par->out, par->family);
1671
1672         atomic64_inc(&qtu_events.match_calls);
1673         if (skb == NULL) {
1674                 res = (info->match ^ info->invert) == 0;
1675                 goto ret_res;
1676         }
1677
1678         switch (par->hooknum) {
1679         case NF_INET_PRE_ROUTING:
1680         case NF_INET_POST_ROUTING:
1681                 atomic64_inc(&qtu_events.match_calls_prepost);
1682                 iface_stat_update_from_skb(skb, par);
1683                 /*
1684                  * We are done in pre/post. The skb will get processed
1685                  * further alter.
1686                  */
1687                 res = (info->match ^ info->invert);
1688                 goto ret_res;
1689                 break;
1690         /* default: Fall through and do UID releated work */
1691         }
1692
1693         sk = skb->sk;
1694         /*
1695          * When in TCP_TIME_WAIT the sk is not a "struct sock" but
1696          * "struct inet_timewait_sock" which is missing fields.
1697          * So we ignore it.
1698          */
1699         if (sk && sk->sk_state == TCP_TIME_WAIT)
1700                 sk = NULL;
1701         if (sk == NULL) {
1702                 /*
1703                  * A missing sk->sk_socket happens when packets are in-flight
1704                  * and the matching socket is already closed and gone.
1705                  */
1706                 sk = qtaguid_find_sk(skb, par);
1707                 /*
1708                  * If we got the socket from the find_sk(), we will need to put
1709                  * it back, as nf_tproxy_get_sock_v4() got it.
1710                  */
1711                 got_sock = sk;
1712                 if (sk)
1713                         atomic64_inc(&qtu_events.match_found_sk_in_ct);
1714                 else
1715                         atomic64_inc(&qtu_events.match_found_no_sk_in_ct);
1716         } else {
1717                 atomic64_inc(&qtu_events.match_found_sk);
1718         }
1719         MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d fam=%d proto=%d\n",
1720                  par->hooknum, sk, got_sock, par->family, ipx_proto(skb, par));
1721         if (sk != NULL) {
1722                 set_sk_callback_lock = true;
1723                 read_lock_bh(&sk->sk_callback_lock);
1724                 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1725                         par->hooknum, sk, sk->sk_socket,
1726                         sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1727                 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1728                 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1729                         par->hooknum, filp ?
1730                         __kuid_val(filp->f_cred->fsuid) : -1);
1731         }
1732
1733         if (sk == NULL || sk->sk_socket == NULL) {
1734                 /*
1735                  * Here, the qtaguid_find_sk() using connection tracking
1736                  * couldn't find the owner, so for now we just count them
1737                  * against the system.
1738                  */
1739                 /*
1740                  * TODO: unhack how to force just accounting.
1741                  * For now we only do iface stats when the uid-owner is not
1742                  * requested.
1743                  */
1744                 if (!(info->match & XT_QTAGUID_UID))
1745                         account_for_uid(skb, sk, 0, par);
1746                 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1747                         par->hooknum,
1748                         sk ? sk->sk_socket : NULL);
1749                 res = (info->match ^ info->invert) == 0;
1750                 atomic64_inc(&qtu_events.match_no_sk);
1751                 goto put_sock_ret_res;
1752         } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1753                 res = false;
1754                 goto put_sock_ret_res;
1755         }
1756         filp = sk->sk_socket->file;
1757         if (filp == NULL) {
1758                 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1759                 account_for_uid(skb, sk, 0, par);
1760                 res = ((info->match ^ info->invert) &
1761                         (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1762                 atomic64_inc(&qtu_events.match_no_sk_file);
1763                 goto put_sock_ret_res;
1764         }
1765         sock_uid = __kuid_val(filp->f_cred->fsuid);
1766         /*
1767          * TODO: unhack how to force just accounting.
1768          * For now we only do iface stats when the uid-owner is not requested
1769          */
1770         if (!(info->match & XT_QTAGUID_UID))
1771                 account_for_uid(skb, sk, sock_uid, par);
1772
1773         /*
1774          * The following two tests fail the match when:
1775          *    id not in range AND no inverted condition requested
1776          * or id     in range AND    inverted condition requested
1777          * Thus (!a && b) || (a && !b) == a ^ b
1778          */
1779         if (info->match & XT_QTAGUID_UID)
1780                 if ((__kuid_val(filp->f_cred->fsuid) >= info->uid_min &&
1781                      __kuid_val(filp->f_cred->fsuid) <= info->uid_max) ^
1782                     !(info->invert & XT_QTAGUID_UID)) {
1783                         MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1784                                  par->hooknum);
1785                         res = false;
1786                         goto put_sock_ret_res;
1787                 }
1788         if (info->match & XT_QTAGUID_GID)
1789                 if ((__kgid_val(filp->f_cred->fsgid) >= info->gid_min &&
1790                      __kgid_val(filp->f_cred->fsgid) <= info->gid_max) ^
1791                     !(info->invert & XT_QTAGUID_GID)) {
1792                         MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1793                                 par->hooknum);
1794                         res = false;
1795                         goto put_sock_ret_res;
1796                 }
1797
1798         MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1799         res = true;
1800
1801 put_sock_ret_res:
1802         if (got_sock)
1803                 xt_socket_put_sk(sk);
1804         if (set_sk_callback_lock)
1805                 read_unlock_bh(&sk->sk_callback_lock);
1806 ret_res:
1807         MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1808         return res;
1809 }
1810
1811 #ifdef DDEBUG
1812 /* This function is not in xt_qtaguid_print.c because of locks visibility */
1813 static void prdebug_full_state(int indent_level, const char *fmt, ...)
1814 {
1815         va_list args;
1816         char *fmt_buff;
1817         char *buff;
1818
1819         if (!unlikely(qtaguid_debug_mask & DDEBUG_MASK))
1820                 return;
1821
1822         fmt_buff = kasprintf(GFP_ATOMIC,
1823                              "qtaguid: %s(): %s {\n", __func__, fmt);
1824         BUG_ON(!fmt_buff);
1825         va_start(args, fmt);
1826         buff = kvasprintf(GFP_ATOMIC,
1827                           fmt_buff, args);
1828         BUG_ON(!buff);
1829         pr_debug("%s", buff);
1830         kfree(fmt_buff);
1831         kfree(buff);
1832         va_end(args);
1833
1834         spin_lock_bh(&sock_tag_list_lock);
1835         prdebug_sock_tag_tree(indent_level, &sock_tag_tree);
1836         spin_unlock_bh(&sock_tag_list_lock);
1837
1838         spin_lock_bh(&sock_tag_list_lock);
1839         spin_lock_bh(&uid_tag_data_tree_lock);
1840         prdebug_uid_tag_data_tree(indent_level, &uid_tag_data_tree);
1841         prdebug_proc_qtu_data_tree(indent_level, &proc_qtu_data_tree);
1842         spin_unlock_bh(&uid_tag_data_tree_lock);
1843         spin_unlock_bh(&sock_tag_list_lock);
1844
1845         spin_lock_bh(&iface_stat_list_lock);
1846         prdebug_iface_stat_list(indent_level, &iface_stat_list);
1847         spin_unlock_bh(&iface_stat_list_lock);
1848
1849         pr_debug("qtaguid: %s(): }\n", __func__);
1850 }
1851 #else
1852 static void prdebug_full_state(int indent_level, const char *fmt, ...) {}
1853 #endif
1854
1855 struct proc_ctrl_print_info {
1856         struct sock *sk; /* socket found by reading to sk_pos */
1857         loff_t sk_pos;
1858 };
1859
1860 static void *qtaguid_ctrl_proc_next(struct seq_file *m, void *v, loff_t *pos)
1861 {
1862         struct proc_ctrl_print_info *pcpi = m->private;
1863         struct sock_tag *sock_tag_entry = v;
1864         struct rb_node *node;
1865
1866         (*pos)++;
1867
1868         if (!v || v  == SEQ_START_TOKEN)
1869                 return NULL;
1870
1871         node = rb_next(&sock_tag_entry->sock_node);
1872         if (!node) {
1873                 pcpi->sk = NULL;
1874                 sock_tag_entry = SEQ_START_TOKEN;
1875         } else {
1876                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1877                 pcpi->sk = sock_tag_entry->sk;
1878         }
1879         pcpi->sk_pos = *pos;
1880         return sock_tag_entry;
1881 }
1882
1883 static void *qtaguid_ctrl_proc_start(struct seq_file *m, loff_t *pos)
1884 {
1885         struct proc_ctrl_print_info *pcpi = m->private;
1886         struct sock_tag *sock_tag_entry;
1887         struct rb_node *node;
1888
1889         spin_lock_bh(&sock_tag_list_lock);
1890
1891         if (unlikely(module_passive))
1892                 return NULL;
1893
1894         if (*pos == 0) {
1895                 pcpi->sk_pos = 0;
1896                 node = rb_first(&sock_tag_tree);
1897                 if (!node) {
1898                         pcpi->sk = NULL;
1899                         return SEQ_START_TOKEN;
1900                 }
1901                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1902                 pcpi->sk = sock_tag_entry->sk;
1903         } else {
1904                 sock_tag_entry = (pcpi->sk ? get_sock_stat_nl(pcpi->sk) :
1905                                                 NULL) ?: SEQ_START_TOKEN;
1906                 if (*pos != pcpi->sk_pos) {
1907                         /* seq_read skipped a next call */
1908                         *pos = pcpi->sk_pos;
1909                         return qtaguid_ctrl_proc_next(m, sock_tag_entry, pos);
1910                 }
1911         }
1912         return sock_tag_entry;
1913 }
1914
1915 static void qtaguid_ctrl_proc_stop(struct seq_file *m, void *v)
1916 {
1917         spin_unlock_bh(&sock_tag_list_lock);
1918 }
1919
1920 /*
1921  * Procfs reader to get all active socket tags using style "1)" as described in
1922  * fs/proc/generic.c
1923  */
1924 static int qtaguid_ctrl_proc_show(struct seq_file *m, void *v)
1925 {
1926         struct sock_tag *sock_tag_entry = v;
1927         uid_t uid;
1928         long f_count;
1929
1930         CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u\n",
1931                  current->pid, current->tgid, __kuid_val(current_fsuid()));
1932
1933         if (sock_tag_entry != SEQ_START_TOKEN) {
1934                 uid = get_uid_from_tag(sock_tag_entry->tag);
1935                 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u) "
1936                          "pid=%u\n",
1937                          sock_tag_entry->sk,
1938                          sock_tag_entry->tag,
1939                          uid,
1940                          sock_tag_entry->pid
1941                         );
1942                 f_count = atomic_long_read(
1943                         &sock_tag_entry->socket->file->f_count);
1944                 seq_printf(m, "sock=%p tag=0x%llx (uid=%u) pid=%u "
1945                            "f_count=%lu\n",
1946                            sock_tag_entry->sk,
1947                            sock_tag_entry->tag, uid,
1948                            sock_tag_entry->pid, f_count);
1949         } else {
1950                 seq_printf(m, "events: sockets_tagged=%llu "
1951                            "sockets_untagged=%llu "
1952                            "counter_set_changes=%llu "
1953                            "delete_cmds=%llu "
1954                            "iface_events=%llu "
1955                            "match_calls=%llu "
1956                            "match_calls_prepost=%llu "
1957                            "match_found_sk=%llu "
1958                            "match_found_sk_in_ct=%llu "
1959                            "match_found_no_sk_in_ct=%llu "
1960                            "match_no_sk=%llu "
1961                            "match_no_sk_file=%llu\n",
1962                            (u64)atomic64_read(&qtu_events.sockets_tagged),
1963                            (u64)atomic64_read(&qtu_events.sockets_untagged),
1964                            (u64)atomic64_read(&qtu_events.counter_set_changes),
1965                            (u64)atomic64_read(&qtu_events.delete_cmds),
1966                            (u64)atomic64_read(&qtu_events.iface_events),
1967                            (u64)atomic64_read(&qtu_events.match_calls),
1968                            (u64)atomic64_read(&qtu_events.match_calls_prepost),
1969                            (u64)atomic64_read(&qtu_events.match_found_sk),
1970                            (u64)atomic64_read(&qtu_events.match_found_sk_in_ct),
1971                            (u64)atomic64_read(&qtu_events.match_found_no_sk_in_ct),
1972                            (u64)atomic64_read(&qtu_events.match_no_sk),
1973                            (u64)atomic64_read(&qtu_events.match_no_sk_file));
1974
1975                 /* Count the following as part of the last item_index */
1976                 prdebug_full_state(0, "proc ctrl");
1977         }
1978
1979         return 0;
1980 }
1981
1982 /*
1983  * Delete socket tags, and stat tags associated with a given
1984  * accouting tag and uid.
1985  */
1986 static int ctrl_cmd_delete(const char *input)
1987 {
1988         char cmd;
1989         uid_t uid;
1990         uid_t entry_uid;
1991         tag_t acct_tag;
1992         tag_t tag;
1993         int res, argc;
1994         struct iface_stat *iface_entry;
1995         struct rb_node *node;
1996         struct sock_tag *st_entry;
1997         struct rb_root st_to_free_tree = RB_ROOT;
1998         struct tag_stat *ts_entry;
1999         struct tag_counter_set *tcs_entry;
2000         struct tag_ref *tr_entry;
2001         struct uid_tag_data *utd_entry;
2002
2003         argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
2004         CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
2005                  "user_tag=0x%llx uid=%u\n", input, argc, cmd,
2006                  acct_tag, uid);
2007         if (argc < 2) {
2008                 res = -EINVAL;
2009                 goto err;
2010         }
2011         if (!valid_atag(acct_tag)) {
2012                 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
2013                 res = -EINVAL;
2014                 goto err;
2015         }
2016         if (argc < 3) {
2017                 uid = __kuid_val(current_fsuid());
2018         } else if (!can_impersonate_uid(uid)) {
2019                 pr_info("qtaguid: ctrl_delete(%s): "
2020                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2021                         input, current->pid, current->tgid,
2022                         __kuid_val(current_fsuid()));
2023                 res = -EPERM;
2024                 goto err;
2025         }
2026
2027         tag = combine_atag_with_uid(acct_tag, uid);
2028         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2029                  "looking for tag=0x%llx (uid=%u)\n",
2030                  input, tag, uid);
2031
2032         /* Delete socket tags */
2033         spin_lock_bh(&sock_tag_list_lock);
2034         node = rb_first(&sock_tag_tree);
2035         while (node) {
2036                 st_entry = rb_entry(node, struct sock_tag, sock_node);
2037                 entry_uid = get_uid_from_tag(st_entry->tag);
2038                 node = rb_next(node);
2039                 if (entry_uid != uid)
2040                         continue;
2041
2042                 CT_DEBUG("qtaguid: ctrl_delete(%s): st tag=0x%llx (uid=%u)\n",
2043                          input, st_entry->tag, entry_uid);
2044
2045                 if (!acct_tag || st_entry->tag == tag) {
2046                         rb_erase(&st_entry->sock_node, &sock_tag_tree);
2047                         /* Can't sockfd_put() within spinlock, do it later. */
2048                         sock_tag_tree_insert(st_entry, &st_to_free_tree);
2049                         tr_entry = lookup_tag_ref(st_entry->tag, NULL);
2050                         BUG_ON(tr_entry->num_sock_tags <= 0);
2051                         tr_entry->num_sock_tags--;
2052                         /*
2053                          * TODO: remove if, and start failing.
2054                          * This is a hack to work around the fact that in some
2055                          * places we have "if (IS_ERR_OR_NULL(pqd_entry))"
2056                          * and are trying to work around apps
2057                          * that didn't open the /dev/xt_qtaguid.
2058                          */
2059                         if (st_entry->list.next && st_entry->list.prev)
2060                                 list_del(&st_entry->list);
2061                 }
2062         }
2063         spin_unlock_bh(&sock_tag_list_lock);
2064
2065         sock_tag_tree_erase(&st_to_free_tree);
2066
2067         /* Delete tag counter-sets */
2068         spin_lock_bh(&tag_counter_set_list_lock);
2069         /* Counter sets are only on the uid tag, not full tag */
2070         tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2071         if (tcs_entry) {
2072                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2073                          "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
2074                          input,
2075                          tcs_entry->tn.tag,
2076                          get_uid_from_tag(tcs_entry->tn.tag),
2077                          tcs_entry->active_set);
2078                 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
2079                 kfree(tcs_entry);
2080         }
2081         spin_unlock_bh(&tag_counter_set_list_lock);
2082
2083         /*
2084          * If acct_tag is 0, then all entries belonging to uid are
2085          * erased.
2086          */
2087         spin_lock_bh(&iface_stat_list_lock);
2088         list_for_each_entry(iface_entry, &iface_stat_list, list) {
2089                 spin_lock_bh(&iface_entry->tag_stat_list_lock);
2090                 node = rb_first(&iface_entry->tag_stat_tree);
2091                 while (node) {
2092                         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2093                         entry_uid = get_uid_from_tag(ts_entry->tn.tag);
2094                         node = rb_next(node);
2095
2096                         CT_DEBUG("qtaguid: ctrl_delete(%s): "
2097                                  "ts tag=0x%llx (uid=%u)\n",
2098                                  input, ts_entry->tn.tag, entry_uid);
2099
2100                         if (entry_uid != uid)
2101                                 continue;
2102                         if (!acct_tag || ts_entry->tn.tag == tag) {
2103                                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2104                                          "erase ts: %s 0x%llx %u\n",
2105                                          input, iface_entry->ifname,
2106                                          get_atag_from_tag(ts_entry->tn.tag),
2107                                          entry_uid);
2108                                 rb_erase(&ts_entry->tn.node,
2109                                          &iface_entry->tag_stat_tree);
2110                                 kfree(ts_entry);
2111                         }
2112                 }
2113                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
2114         }
2115         spin_unlock_bh(&iface_stat_list_lock);
2116
2117         /* Cleanup the uid_tag_data */
2118         spin_lock_bh(&uid_tag_data_tree_lock);
2119         node = rb_first(&uid_tag_data_tree);
2120         while (node) {
2121                 utd_entry = rb_entry(node, struct uid_tag_data, node);
2122                 entry_uid = utd_entry->uid;
2123                 node = rb_next(node);
2124
2125                 CT_DEBUG("qtaguid: ctrl_delete(%s): "
2126                          "utd uid=%u\n",
2127                          input, entry_uid);
2128
2129                 if (entry_uid != uid)
2130                         continue;
2131                 /*
2132                  * Go over the tag_refs, and those that don't have
2133                  * sock_tags using them are freed.
2134                  */
2135                 put_tag_ref_tree(tag, utd_entry);
2136                 put_utd_entry(utd_entry);
2137         }
2138         spin_unlock_bh(&uid_tag_data_tree_lock);
2139
2140         atomic64_inc(&qtu_events.delete_cmds);
2141         res = 0;
2142
2143 err:
2144         return res;
2145 }
2146
2147 static int ctrl_cmd_counter_set(const char *input)
2148 {
2149         char cmd;
2150         uid_t uid = 0;
2151         tag_t tag;
2152         int res, argc;
2153         struct tag_counter_set *tcs;
2154         int counter_set;
2155
2156         argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
2157         CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
2158                  "set=%d uid=%u\n", input, argc, cmd,
2159                  counter_set, uid);
2160         if (argc != 3) {
2161                 res = -EINVAL;
2162                 goto err;
2163         }
2164         if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
2165                 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
2166                         input);
2167                 res = -EINVAL;
2168                 goto err;
2169         }
2170         if (!can_manipulate_uids()) {
2171                 pr_info("qtaguid: ctrl_counterset(%s): "
2172                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2173                         input, current->pid, current->tgid,
2174                         __kuid_val(current_fsuid()));
2175                 res = -EPERM;
2176                 goto err;
2177         }
2178
2179         tag = make_tag_from_uid(uid);
2180         spin_lock_bh(&tag_counter_set_list_lock);
2181         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
2182         if (!tcs) {
2183                 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
2184                 if (!tcs) {
2185                         spin_unlock_bh(&tag_counter_set_list_lock);
2186                         pr_err("qtaguid: ctrl_counterset(%s): "
2187                                "failed to alloc counter set\n",
2188                                input);
2189                         res = -ENOMEM;
2190                         goto err;
2191                 }
2192                 tcs->tn.tag = tag;
2193                 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
2194                 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
2195                          "(uid=%u) set=%d\n",
2196                          input, tag, get_uid_from_tag(tag), counter_set);
2197         }
2198         tcs->active_set = counter_set;
2199         spin_unlock_bh(&tag_counter_set_list_lock);
2200         atomic64_inc(&qtu_events.counter_set_changes);
2201         res = 0;
2202
2203 err:
2204         return res;
2205 }
2206
2207 static int ctrl_cmd_tag(const char *input)
2208 {
2209         char cmd;
2210         int sock_fd = 0;
2211         uid_t uid = 0;
2212         tag_t acct_tag = make_atag_from_value(0);
2213         tag_t full_tag;
2214         struct socket *el_socket;
2215         int res, argc;
2216         struct sock_tag *sock_tag_entry;
2217         struct tag_ref *tag_ref_entry;
2218         struct uid_tag_data *uid_tag_data_entry;
2219         struct proc_qtu_data *pqd_entry;
2220
2221         /* Unassigned args will get defaulted later. */
2222         argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
2223         CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
2224                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
2225                  acct_tag, uid);
2226         if (argc < 2) {
2227                 res = -EINVAL;
2228                 goto err;
2229         }
2230         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2231         if (!el_socket) {
2232                 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
2233                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2234                         input, sock_fd, res, current->pid, current->tgid,
2235                         __kuid_val(current_fsuid()));
2236                 goto err;
2237         }
2238         CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
2239                  input, atomic_long_read(&el_socket->file->f_count),
2240                  el_socket->sk);
2241         if (argc < 3) {
2242                 acct_tag = make_atag_from_value(0);
2243         } else if (!valid_atag(acct_tag)) {
2244                 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
2245                 res = -EINVAL;
2246                 goto err_put;
2247         }
2248         CT_DEBUG("qtaguid: ctrl_tag(%s): "
2249                  "pid=%u tgid=%u uid=%u euid=%u fsuid=%u "
2250                  "ctrl.gid=%u in_group()=%d in_egroup()=%d\n",
2251                  input, current->pid, current->tgid,
2252                  __kuid_val(current_uid()),
2253                  __kuid_val(current_euid()),
2254                  __kuid_val(current_fsuid()),
2255                  __kgid_val(xt_qtaguid_ctrl_file->gid),
2256                  in_group_p(xt_qtaguid_ctrl_file->gid),
2257                  in_egroup_p(xt_qtaguid_ctrl_file->gid));
2258         if (argc < 4) {
2259                 uid = __kuid_val(current_fsuid());
2260         } else if (!can_impersonate_uid(uid)) {
2261                 pr_info("qtaguid: ctrl_tag(%s): "
2262                         "insufficient priv from pid=%u tgid=%u uid=%u\n",
2263                         input, current->pid, current->tgid,
2264                         __kuid_val(current_fsuid()));
2265                 res = -EPERM;
2266                 goto err_put;
2267         }
2268         full_tag = combine_atag_with_uid(acct_tag, uid);
2269
2270         spin_lock_bh(&sock_tag_list_lock);
2271         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2272         tag_ref_entry = get_tag_ref(full_tag, &uid_tag_data_entry);
2273         if (IS_ERR(tag_ref_entry)) {
2274                 res = PTR_ERR(tag_ref_entry);
2275                 spin_unlock_bh(&sock_tag_list_lock);
2276                 goto err_put;
2277         }
2278         tag_ref_entry->num_sock_tags++;
2279         if (sock_tag_entry) {
2280                 struct tag_ref *prev_tag_ref_entry;
2281
2282                 CT_DEBUG("qtaguid: ctrl_tag(%s): retag for sk=%p "
2283                          "st@%p ...->f_count=%ld\n",
2284                          input, el_socket->sk, sock_tag_entry,
2285                          atomic_long_read(&el_socket->file->f_count));
2286                 /*
2287                  * This is a re-tagging, so release the sock_fd that was
2288                  * locked at the time of the 1st tagging.
2289                  * There is still the ref from this call's sockfd_lookup() so
2290                  * it can be done within the spinlock.
2291                  */
2292                 sockfd_put(sock_tag_entry->socket);
2293                 prev_tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag,
2294                                                     &uid_tag_data_entry);
2295                 BUG_ON(IS_ERR_OR_NULL(prev_tag_ref_entry));
2296                 BUG_ON(prev_tag_ref_entry->num_sock_tags <= 0);
2297                 prev_tag_ref_entry->num_sock_tags--;
2298                 sock_tag_entry->tag = full_tag;
2299         } else {
2300                 CT_DEBUG("qtaguid: ctrl_tag(%s): newtag for sk=%p\n",
2301                          input, el_socket->sk);
2302                 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
2303                                          GFP_ATOMIC);
2304                 if (!sock_tag_entry) {
2305                         pr_err("qtaguid: ctrl_tag(%s): "
2306                                "socket tag alloc failed\n",
2307                                input);
2308                         spin_unlock_bh(&sock_tag_list_lock);
2309                         res = -ENOMEM;
2310                         goto err_tag_unref_put;
2311                 }
2312                 sock_tag_entry->sk = el_socket->sk;
2313                 sock_tag_entry->socket = el_socket;
2314                 sock_tag_entry->pid = current->tgid;
2315                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
2316                                                             uid);
2317                 spin_lock_bh(&uid_tag_data_tree_lock);
2318                 pqd_entry = proc_qtu_data_tree_search(
2319                         &proc_qtu_data_tree, current->tgid);
2320                 /*
2321                  * TODO: remove if, and start failing.
2322                  * At first, we want to catch user-space code that is not
2323                  * opening the /dev/xt_qtaguid.
2324                  */
2325                 if (IS_ERR_OR_NULL(pqd_entry))
2326                         pr_warn_once(
2327                                 "qtaguid: %s(): "
2328                                 "User space forgot to open /dev/xt_qtaguid? "
2329                                 "pid=%u tgid=%u uid=%u\n", __func__,
2330                                 current->pid, current->tgid,
2331                                 __kuid_val(current_fsuid()));
2332                 else
2333                         list_add(&sock_tag_entry->list,
2334                                  &pqd_entry->sock_tag_list);
2335                 spin_unlock_bh(&uid_tag_data_tree_lock);
2336
2337                 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
2338                 atomic64_inc(&qtu_events.sockets_tagged);
2339         }
2340         spin_unlock_bh(&sock_tag_list_lock);
2341         /* We keep the ref to the socket (file) until it is untagged */
2342         CT_DEBUG("qtaguid: ctrl_tag(%s): done st@%p ...->f_count=%ld\n",
2343                  input, sock_tag_entry,
2344                  atomic_long_read(&el_socket->file->f_count));
2345         return 0;
2346
2347 err_tag_unref_put:
2348         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2349         tag_ref_entry->num_sock_tags--;
2350         free_tag_ref_from_utd_entry(tag_ref_entry, uid_tag_data_entry);
2351 err_put:
2352         CT_DEBUG("qtaguid: ctrl_tag(%s): done. ...->f_count=%ld\n",
2353                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2354         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2355         sockfd_put(el_socket);
2356         return res;
2357
2358 err:
2359         CT_DEBUG("qtaguid: ctrl_tag(%s): done.\n", input);
2360         return res;
2361 }
2362
2363 static int ctrl_cmd_untag(const char *input)
2364 {
2365         char cmd;
2366         int sock_fd = 0;
2367         struct socket *el_socket;
2368         int res, argc;
2369         struct sock_tag *sock_tag_entry;
2370         struct tag_ref *tag_ref_entry;
2371         struct uid_tag_data *utd_entry;
2372         struct proc_qtu_data *pqd_entry;
2373
2374         argc = sscanf(input, "%c %d", &cmd, &sock_fd);
2375         CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
2376                  input, argc, cmd, sock_fd);
2377         if (argc < 2) {
2378                 res = -EINVAL;
2379                 goto err;
2380         }
2381         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
2382         if (!el_socket) {
2383                 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
2384                         " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
2385                         input, sock_fd, res, current->pid, current->tgid,
2386                         __kuid_val(current_fsuid()));
2387                 goto err;
2388         }
2389         CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
2390                  input, atomic_long_read(&el_socket->file->f_count),
2391                  el_socket->sk);
2392         spin_lock_bh(&sock_tag_list_lock);
2393         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
2394         if (!sock_tag_entry) {
2395                 spin_unlock_bh(&sock_tag_list_lock);
2396                 res = -EINVAL;
2397                 goto err_put;
2398         }
2399         /*
2400          * The socket already belongs to the current process
2401          * so it can do whatever it wants to it.
2402          */
2403         rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
2404
2405         tag_ref_entry = lookup_tag_ref(sock_tag_entry->tag, &utd_entry);
2406         BUG_ON(!tag_ref_entry);
2407         BUG_ON(tag_ref_entry->num_sock_tags <= 0);
2408         spin_lock_bh(&uid_tag_data_tree_lock);
2409         pqd_entry = proc_qtu_data_tree_search(
2410                 &proc_qtu_data_tree, current->tgid);
2411         /*
2412          * TODO: remove if, and start failing.
2413          * At first, we want to catch user-space code that is not
2414          * opening the /dev/xt_qtaguid.
2415          */
2416         if (IS_ERR_OR_NULL(pqd_entry))
2417                 pr_warn_once("qtaguid: %s(): "
2418                              "User space forgot to open /dev/xt_qtaguid? "
2419                              "pid=%u tgid=%u uid=%u\n", __func__,
2420                              current->pid, current->tgid,
2421                              __kuid_val(current_fsuid()));
2422         else
2423                 list_del(&sock_tag_entry->list);
2424         spin_unlock_bh(&uid_tag_data_tree_lock);
2425         /*
2426          * We don't free tag_ref from the utd_entry here,
2427          * only during a cmd_delete().
2428          */
2429         tag_ref_entry->num_sock_tags--;
2430         spin_unlock_bh(&sock_tag_list_lock);
2431         /*
2432          * Release the sock_fd that was grabbed at tag time,
2433          * and once more for the sockfd_lookup() here.
2434          */
2435         sockfd_put(sock_tag_entry->socket);
2436         CT_DEBUG("qtaguid: ctrl_untag(%s): done. st@%p ...->f_count=%ld\n",
2437                  input, sock_tag_entry,
2438                  atomic_long_read(&el_socket->file->f_count) - 1);
2439         sockfd_put(el_socket);
2440
2441         kfree(sock_tag_entry);
2442         atomic64_inc(&qtu_events.sockets_untagged);
2443
2444         return 0;
2445
2446 err_put:
2447         CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%ld\n",
2448                  input, atomic_long_read(&el_socket->file->f_count) - 1);
2449         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
2450         sockfd_put(el_socket);
2451         return res;
2452
2453 err:
2454         CT_DEBUG("qtaguid: ctrl_untag(%s): done.\n", input);
2455         return res;
2456 }
2457
2458 static ssize_t qtaguid_ctrl_parse(const char *input, size_t count)
2459 {
2460         char cmd;
2461         ssize_t res;
2462
2463         CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
2464                  input, current->pid, current->tgid,
2465                  __kuid_val(current_fsuid()));
2466
2467         cmd = input[0];
2468         /* Collect params for commands */
2469         switch (cmd) {
2470         case 'd':
2471                 res = ctrl_cmd_delete(input);
2472                 break;
2473
2474         case 's':
2475                 res = ctrl_cmd_counter_set(input);
2476                 break;
2477
2478         case 't':
2479                 res = ctrl_cmd_tag(input);
2480                 break;
2481
2482         case 'u':
2483                 res = ctrl_cmd_untag(input);
2484                 break;
2485
2486         default:
2487                 res = -EINVAL;
2488                 goto err;
2489         }
2490         if (!res)
2491                 res = count;
2492 err:
2493         CT_DEBUG("qtaguid: ctrl(%s): res=%zd\n", input, res);
2494         return res;
2495 }
2496
2497 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
2498 static ssize_t qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
2499                                    size_t count, loff_t *offp)
2500 {
2501         char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
2502
2503         if (unlikely(module_passive))
2504                 return count;
2505
2506         if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
2507                 return -EINVAL;
2508
2509         if (copy_from_user(input_buf, buffer, count))
2510                 return -EFAULT;
2511
2512         input_buf[count] = '\0';
2513         return qtaguid_ctrl_parse(input_buf, count);
2514 }
2515
2516 struct proc_print_info {
2517         struct iface_stat *iface_entry;
2518         int item_index;
2519         tag_t tag; /* tag found by reading to tag_pos */
2520         off_t tag_pos;
2521         int tag_item_index;
2522 };
2523
2524 static void pp_stats_header(struct seq_file *m)
2525 {
2526         seq_puts(m,
2527                  "idx iface acct_tag_hex uid_tag_int cnt_set "
2528                  "rx_bytes rx_packets "
2529                  "tx_bytes tx_packets "
2530                  "rx_tcp_bytes rx_tcp_packets "
2531                  "rx_udp_bytes rx_udp_packets "
2532                  "rx_other_bytes rx_other_packets "
2533                  "tx_tcp_bytes tx_tcp_packets "
2534                  "tx_udp_bytes tx_udp_packets "
2535                  "tx_other_bytes tx_other_packets\n");
2536 }
2537
2538 static int pp_stats_line(struct seq_file *m, struct tag_stat *ts_entry,
2539                          int cnt_set)
2540 {
2541         int ret;
2542         struct data_counters *cnts;
2543         tag_t tag = ts_entry->tn.tag;
2544         uid_t stat_uid = get_uid_from_tag(tag);
2545         struct proc_print_info *ppi = m->private;
2546         /* Detailed tags are not available to everybody */
2547         if (get_atag_from_tag(tag) && !can_read_other_uid_stats(stat_uid)) {
2548                 CT_DEBUG("qtaguid: stats line: "
2549                          "%s 0x%llx %u: insufficient priv "
2550                          "from pid=%u tgid=%u uid=%u stats.gid=%u\n",
2551                          ppi->iface_entry->ifname,
2552                          get_atag_from_tag(tag), stat_uid,
2553                          current->pid, current->tgid,
2554                          __kuid_val(current_fsuid()),
2555                          __kgid_val(xt_qtaguid_stats_file->gid));
2556                 return 0;
2557         }
2558         ppi->item_index++;
2559         cnts = &ts_entry->counters;
2560         ret = seq_printf(m, "%d %s 0x%llx %u %u "
2561                 "%llu %llu "
2562                 "%llu %llu "
2563                 "%llu %llu "
2564                 "%llu %llu "
2565                 "%llu %llu "
2566                 "%llu %llu "
2567                 "%llu %llu "
2568                 "%llu %llu\n",
2569                 ppi->item_index,
2570                 ppi->iface_entry->ifname,
2571                 get_atag_from_tag(tag),
2572                 stat_uid,
2573                 cnt_set,
2574                 dc_sum_bytes(cnts, cnt_set, IFS_RX),
2575                 dc_sum_packets(cnts, cnt_set, IFS_RX),
2576                 dc_sum_bytes(cnts, cnt_set, IFS_TX),
2577                 dc_sum_packets(cnts, cnt_set, IFS_TX),
2578                 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
2579                 cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
2580                 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
2581                 cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
2582                 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
2583                 cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
2584                 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
2585                 cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
2586                 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
2587                 cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
2588                 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
2589                 cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
2590         return ret ?: 1;
2591 }
2592
2593 static bool pp_sets(struct seq_file *m, struct tag_stat *ts_entry)
2594 {
2595         int ret;
2596         int counter_set;
2597         for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
2598              counter_set++) {
2599                 ret = pp_stats_line(m, ts_entry, counter_set);
2600                 if (ret < 0)
2601                         return false;
2602         }
2603         return true;
2604 }
2605
2606 static int qtaguid_stats_proc_iface_stat_ptr_valid(struct iface_stat *ptr)
2607 {
2608         struct iface_stat *iface_entry;
2609
2610         if (!ptr)
2611                 return false;
2612
2613         list_for_each_entry(iface_entry, &iface_stat_list, list)
2614                 if (iface_entry == ptr)
2615                         return true;
2616         return false;
2617 }
2618
2619 static void qtaguid_stats_proc_next_iface_entry(struct proc_print_info *ppi)
2620 {
2621         spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2622         list_for_each_entry_continue(ppi->iface_entry, &iface_stat_list, list) {
2623                 spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2624                 return;
2625         }
2626         ppi->iface_entry = NULL;
2627 }
2628
2629 static void *qtaguid_stats_proc_next(struct seq_file *m, void *v, loff_t *pos)
2630 {
2631         struct proc_print_info *ppi = m->private;
2632         struct tag_stat *ts_entry;
2633         struct rb_node *node;
2634
2635         if (!v) {
2636                 pr_err("qtaguid: %s(): unexpected v: NULL\n", __func__);
2637                 return NULL;
2638         }
2639
2640         (*pos)++;
2641
2642         if (!ppi->iface_entry || unlikely(module_passive))
2643                 return NULL;
2644
2645         if (v == SEQ_START_TOKEN)
2646                 node = rb_first(&ppi->iface_entry->tag_stat_tree);
2647         else
2648                 node = rb_next(&((struct tag_stat *)v)->tn.node);
2649
2650         while (!node) {
2651                 qtaguid_stats_proc_next_iface_entry(ppi);
2652                 if (!ppi->iface_entry)
2653                         return NULL;
2654                 node = rb_first(&ppi->iface_entry->tag_stat_tree);
2655         }
2656
2657         ts_entry = rb_entry(node, struct tag_stat, tn.node);
2658         ppi->tag = ts_entry->tn.tag;
2659         ppi->tag_pos = *pos;
2660         ppi->tag_item_index = ppi->item_index;
2661         return ts_entry;
2662 }
2663
2664 static void *qtaguid_stats_proc_start(struct seq_file *m, loff_t *pos)
2665 {
2666         struct proc_print_info *ppi = m->private;
2667         struct tag_stat *ts_entry = NULL;
2668
2669         spin_lock_bh(&iface_stat_list_lock);
2670
2671         if (*pos == 0) {
2672                 ppi->item_index = 1;
2673                 ppi->tag_pos = 0;
2674                 if (list_empty(&iface_stat_list)) {
2675                         ppi->iface_entry = NULL;
2676                 } else {
2677                         ppi->iface_entry = list_first_entry(&iface_stat_list,
2678                                                             struct iface_stat,
2679                                                             list);
2680                         spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2681                 }
2682                 return SEQ_START_TOKEN;
2683         }
2684         if (!qtaguid_stats_proc_iface_stat_ptr_valid(ppi->iface_entry)) {
2685                 if (ppi->iface_entry) {
2686                         pr_err("qtaguid: %s(): iface_entry %p not found\n",
2687                                __func__, ppi->iface_entry);
2688                         ppi->iface_entry = NULL;
2689                 }
2690                 return NULL;
2691         }
2692
2693         spin_lock_bh(&ppi->iface_entry->tag_stat_list_lock);
2694
2695         if (!ppi->tag_pos) {
2696                 /* seq_read skipped first next call */
2697                 ts_entry = SEQ_START_TOKEN;
2698         } else {
2699                 ts_entry = tag_stat_tree_search(
2700                                 &ppi->iface_entry->tag_stat_tree, ppi->tag);
2701                 if (!ts_entry) {
2702                         pr_info("qtaguid: %s(): tag_stat.tag 0x%llx not found. Abort.\n",
2703                                 __func__, ppi->tag);
2704                         return NULL;
2705                 }
2706         }
2707
2708         if (*pos == ppi->tag_pos) { /* normal resume */
2709                 ppi->item_index = ppi->tag_item_index;
2710         } else {
2711                 /* seq_read skipped a next call */
2712                 *pos = ppi->tag_pos;
2713                 ts_entry = qtaguid_stats_proc_next(m, ts_entry, pos);
2714         }
2715
2716         return ts_entry;
2717 }
2718
2719 static void qtaguid_stats_proc_stop(struct seq_file *m, void *v)
2720 {
2721         struct proc_print_info *ppi = m->private;
2722         if (ppi->iface_entry)
2723                 spin_unlock_bh(&ppi->iface_entry->tag_stat_list_lock);
2724         spin_unlock_bh(&iface_stat_list_lock);
2725 }
2726
2727 /*
2728  * Procfs reader to get all tag stats using style "1)" as described in
2729  * fs/proc/generic.c
2730  * Groups all protocols tx/rx bytes.
2731  */
2732 static int qtaguid_stats_proc_show(struct seq_file *m, void *v)
2733 {
2734         struct tag_stat *ts_entry = v;
2735
2736         if (v == SEQ_START_TOKEN)
2737                 pp_stats_header(m);
2738         else
2739                 pp_sets(m, ts_entry);
2740
2741         return 0;
2742 }
2743
2744 /*------------------------------------------*/
2745 static int qtudev_open(struct inode *inode, struct file *file)
2746 {
2747         struct uid_tag_data *utd_entry;
2748         struct proc_qtu_data  *pqd_entry;
2749         struct proc_qtu_data  *new_pqd_entry;
2750         int res;
2751         bool utd_entry_found;
2752
2753         if (unlikely(qtu_proc_handling_passive))
2754                 return 0;
2755
2756         DR_DEBUG("qtaguid: qtudev_open(): pid=%u tgid=%u uid=%u\n",
2757                  current->pid, current->tgid, __kuid_val(current_fsuid()));
2758
2759         spin_lock_bh(&uid_tag_data_tree_lock);
2760
2761         /* Look for existing uid data, or alloc one. */
2762         utd_entry = get_uid_data(__kuid_val(current_fsuid()), &utd_entry_found);
2763         if (IS_ERR_OR_NULL(utd_entry)) {
2764                 res = PTR_ERR(utd_entry);
2765                 goto err_unlock;
2766         }
2767
2768         /* Look for existing PID based proc_data */
2769         pqd_entry = proc_qtu_data_tree_search(&proc_qtu_data_tree,
2770                                               current->tgid);
2771         if (pqd_entry) {
2772                 pr_err("qtaguid: qtudev_open(): %u/%u %u "
2773                        "%s already opened\n",
2774                        current->pid, current->tgid, __kuid_val(current_fsuid()),
2775                        QTU_DEV_NAME);
2776                 res = -EBUSY;
2777                 goto err_unlock_free_utd;
2778         }
2779
2780         new_pqd_entry = kzalloc(sizeof(*new_pqd_entry), GFP_ATOMIC);
2781         if (!new_pqd_entry) {
2782                 pr_err("qtaguid: qtudev_open(): %u/%u %u: "
2783                        "proc data alloc failed\n",
2784                        current->pid, current->tgid,
2785                        __kuid_val(current_fsuid()));
2786                 res = -ENOMEM;
2787                 goto err_unlock_free_utd;
2788         }
2789         new_pqd_entry->pid = current->tgid;
2790         INIT_LIST_HEAD(&new_pqd_entry->sock_tag_list);
2791         new_pqd_entry->parent_tag_data = utd_entry;
2792         utd_entry->num_pqd++;
2793
2794         proc_qtu_data_tree_insert(new_pqd_entry,
2795                                   &proc_qtu_data_tree);
2796
2797         spin_unlock_bh(&uid_tag_data_tree_lock);
2798         DR_DEBUG("qtaguid: tracking data for uid=%u in pqd=%p\n",
2799                  __kuid_val(current_fsuid()), new_pqd_entry);
2800         file->private_data = new_pqd_entry;
2801         return 0;
2802
2803 err_unlock_free_utd:
2804         if (!utd_entry_found) {
2805                 rb_erase(&utd_entry->node, &uid_tag_data_tree);
2806                 kfree(utd_entry);
2807         }
2808 err_unlock:
2809         spin_unlock_bh(&uid_tag_data_tree_lock);
2810         return res;
2811 }
2812
2813 static int qtudev_release(struct inode *inode, struct file *file)
2814 {
2815         struct proc_qtu_data  *pqd_entry = file->private_data;
2816         struct uid_tag_data  *utd_entry = pqd_entry->parent_tag_data;
2817         struct sock_tag *st_entry;
2818         struct rb_root st_to_free_tree = RB_ROOT;
2819         struct list_head *entry, *next;
2820         struct tag_ref *tr;
2821
2822         if (unlikely(qtu_proc_handling_passive))
2823                 return 0;
2824
2825         /*
2826          * Do not trust the current->pid, it might just be a kworker cleaning
2827          * up after a dead proc.
2828          */
2829         DR_DEBUG("qtaguid: qtudev_release(): "
2830                  "pid=%u tgid=%u uid=%u "
2831                  "pqd_entry=%p->pid=%u utd_entry=%p->active_tags=%d\n",
2832                  current->pid, current->tgid, pqd_entry->parent_tag_data->uid,
2833                  pqd_entry, pqd_entry->pid, utd_entry,
2834                  utd_entry->num_active_tags);
2835
2836         spin_lock_bh(&sock_tag_list_lock);
2837         spin_lock_bh(&uid_tag_data_tree_lock);
2838
2839         list_for_each_safe(entry, next, &pqd_entry->sock_tag_list) {
2840                 st_entry = list_entry(entry, struct sock_tag, list);
2841                 DR_DEBUG("qtaguid: %s(): "
2842                          "erase sock_tag=%p->sk=%p pid=%u tgid=%u uid=%u\n",
2843                          __func__,
2844                          st_entry, st_entry->sk,
2845                          current->pid, current->tgid,
2846                          pqd_entry->parent_tag_data->uid);
2847
2848                 utd_entry = uid_tag_data_tree_search(
2849                         &uid_tag_data_tree,
2850                         get_uid_from_tag(st_entry->tag));
2851                 BUG_ON(IS_ERR_OR_NULL(utd_entry));
2852                 DR_DEBUG("qtaguid: %s(): "
2853                          "looking for tag=0x%llx in utd_entry=%p\n", __func__,
2854                          st_entry->tag, utd_entry);
2855                 tr = tag_ref_tree_search(&utd_entry->tag_ref_tree,
2856                                          st_entry->tag);
2857                 BUG_ON(!tr);
2858                 BUG_ON(tr->num_sock_tags <= 0);
2859                 tr->num_sock_tags--;
2860                 free_tag_ref_from_utd_entry(tr, utd_entry);
2861
2862                 rb_erase(&st_entry->sock_node, &sock_tag_tree);
2863                 list_del(&st_entry->list);
2864                 /* Can't sockfd_put() within spinlock, do it later. */
2865                 sock_tag_tree_insert(st_entry, &st_to_free_tree);
2866
2867                 /*
2868                  * Try to free the utd_entry if no other proc_qtu_data is
2869                  * using it (num_pqd is 0) and it doesn't have active tags
2870                  * (num_active_tags is 0).
2871                  */
2872                 put_utd_entry(utd_entry);
2873         }
2874
2875         rb_erase(&pqd_entry->node, &proc_qtu_data_tree);
2876         BUG_ON(pqd_entry->parent_tag_data->num_pqd < 1);
2877         pqd_entry->parent_tag_data->num_pqd--;
2878         put_utd_entry(pqd_entry->parent_tag_data);
2879         kfree(pqd_entry);
2880         file->private_data = NULL;
2881
2882         spin_unlock_bh(&uid_tag_data_tree_lock);
2883         spin_unlock_bh(&sock_tag_list_lock);
2884
2885
2886         sock_tag_tree_erase(&st_to_free_tree);
2887
2888         prdebug_full_state(0, "%s(): pid=%u tgid=%u", __func__,
2889                            current->pid, current->tgid);
2890         return 0;
2891 }
2892
2893 /*------------------------------------------*/
2894 static const struct file_operations qtudev_fops = {
2895         .owner = THIS_MODULE,
2896         .open = qtudev_open,
2897         .release = qtudev_release,
2898 };
2899
2900 static struct miscdevice qtu_device = {
2901         .minor = MISC_DYNAMIC_MINOR,
2902         .name = QTU_DEV_NAME,
2903         .fops = &qtudev_fops,
2904         /* How sad it doesn't allow for defaults: .mode = S_IRUGO | S_IWUSR */
2905 };
2906
2907 static const struct seq_operations proc_qtaguid_ctrl_seqops = {
2908         .start = qtaguid_ctrl_proc_start,
2909         .next = qtaguid_ctrl_proc_next,
2910         .stop = qtaguid_ctrl_proc_stop,
2911         .show = qtaguid_ctrl_proc_show,
2912 };
2913
2914 static int proc_qtaguid_ctrl_open(struct inode *inode, struct file *file)
2915 {
2916         return seq_open_private(file, &proc_qtaguid_ctrl_seqops,
2917                                 sizeof(struct proc_ctrl_print_info));
2918 }
2919
2920 static const struct file_operations proc_qtaguid_ctrl_fops = {
2921         .open           = proc_qtaguid_ctrl_open,
2922         .read           = seq_read,
2923         .write          = qtaguid_ctrl_proc_write,
2924         .llseek         = seq_lseek,
2925         .release        = seq_release_private,
2926 };
2927
2928 static const struct seq_operations proc_qtaguid_stats_seqops = {
2929         .start = qtaguid_stats_proc_start,
2930         .next = qtaguid_stats_proc_next,
2931         .stop = qtaguid_stats_proc_stop,
2932         .show = qtaguid_stats_proc_show,
2933 };
2934
2935 static int proc_qtaguid_stats_open(struct inode *inode, struct file *file)
2936 {
2937         return seq_open_private(file, &proc_qtaguid_stats_seqops,
2938                                 sizeof(struct proc_print_info));
2939 }
2940
2941 static const struct file_operations proc_qtaguid_stats_fops = {
2942         .open           = proc_qtaguid_stats_open,
2943         .read           = seq_read,
2944         .llseek         = seq_lseek,
2945         .release        = seq_release_private,
2946 };
2947
2948 /*------------------------------------------*/
2949 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
2950 {
2951         int ret;
2952         *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
2953         if (!*res_procdir) {
2954                 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
2955                 ret = -ENOMEM;
2956                 goto no_dir;
2957         }
2958
2959         xt_qtaguid_ctrl_file = proc_create_data("ctrl", proc_ctrl_perms,
2960                                                 *res_procdir,
2961                                                 &proc_qtaguid_ctrl_fops,
2962                                                 NULL);
2963         if (!xt_qtaguid_ctrl_file) {
2964                 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
2965                         " file\n");
2966                 ret = -ENOMEM;
2967                 goto no_ctrl_entry;
2968         }
2969
2970         xt_qtaguid_stats_file = proc_create_data("stats", proc_stats_perms,
2971                                                  *res_procdir,
2972                                                  &proc_qtaguid_stats_fops,
2973                                                  NULL);
2974         if (!xt_qtaguid_stats_file) {
2975                 pr_err("qtaguid: failed to create xt_qtaguid/stats "
2976                         "file\n");
2977                 ret = -ENOMEM;
2978                 goto no_stats_entry;
2979         }
2980         /*
2981          * TODO: add support counter hacking
2982          * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
2983          */
2984         return 0;
2985
2986 no_stats_entry:
2987         remove_proc_entry("ctrl", *res_procdir);
2988 no_ctrl_entry:
2989         remove_proc_entry("xt_qtaguid", NULL);
2990 no_dir:
2991         return ret;
2992 }
2993
2994 static struct xt_match qtaguid_mt_reg __read_mostly = {
2995         /*
2996          * This module masquerades as the "owner" module so that iptables
2997          * tools can deal with it.
2998          */
2999         .name       = "owner",
3000         .revision   = 1,
3001         .family     = NFPROTO_UNSPEC,
3002         .match      = qtaguid_mt,
3003         .matchsize  = sizeof(struct xt_qtaguid_match_info),
3004         .me         = THIS_MODULE,
3005 };
3006
3007 static int __init qtaguid_mt_init(void)
3008 {
3009         if (qtaguid_proc_register(&xt_qtaguid_procdir)
3010             || iface_stat_init(xt_qtaguid_procdir)
3011             || xt_register_match(&qtaguid_mt_reg)
3012             || misc_register(&qtu_device))
3013                 return -1;
3014         return 0;
3015 }
3016
3017 /*
3018  * TODO: allow unloading of the module.
3019  * For now stats are permanent.
3020  * Kconfig forces'y/n' and never an 'm'.
3021  */
3022
3023 module_init(qtaguid_mt_init);
3024 MODULE_AUTHOR("jpa <jpa@google.com>");
3025 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
3026 MODULE_LICENSE("GPL");
3027 MODULE_ALIAS("ipt_owner");
3028 MODULE_ALIAS("ip6t_owner");
3029 MODULE_ALIAS("ipt_qtaguid");
3030 MODULE_ALIAS("ip6t_qtaguid");