968693cb1bc07cdbbe07916e0e949e6f0d0bb152
[linux-2.6.git] / net / netfilter / xt_qtaguid.c
1 /*
2  * Kernel iptables module to track stats for packets based on user tags.
3  *
4  * (C) 2011 Google, Inc
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 /* #define DEBUG */
12 /* #define IDEBUG */
13 /* #define MDEBUG */
14 /* #define RDEBUG */
15 /* #define CDEBUG */
16
17 /* Iface handling */
18 #ifdef IDEBUG
19 #define IF_DEBUG(...) pr_debug(__VA_ARGS__)
20 #else
21 #define IF_DEBUG(...) no_printk(__VA_ARGS__)
22 #endif
23 /* Iptable Matching */
24 #ifdef MDEBUG
25 #define MT_DEBUG(...) pr_debug(__VA_ARGS__)
26 #else
27 #define MT_DEBUG(...) no_printk(__VA_ARGS__)
28 #endif
29 /* Red-black tree handling */
30 #ifdef RDEBUG
31 #define RB_DEBUG(...) pr_debug(__VA_ARGS__)
32 #else
33 #define RB_DEBUG(...) no_printk(__VA_ARGS__)
34 #endif
35 /* procfs ctrl/stats handling */
36 #ifdef CDEBUG
37 #define CT_DEBUG(...) pr_debug(__VA_ARGS__)
38 #else
39 #define CT_DEBUG(...) no_printk(__VA_ARGS__)
40 #endif
41
42 #include <linux/file.h>
43 #include <linux/inetdevice.h>
44 #include <linux/module.h>
45 #include <linux/netfilter/x_tables.h>
46 #include <linux/netfilter/xt_qtaguid.h>
47 #include <linux/skbuff.h>
48 #include <linux/workqueue.h>
49 #include <net/addrconf.h>
50 #include <net/sock.h>
51 #include <net/tcp.h>
52 #include <net/udp.h>
53
54 #include <linux/netfilter/xt_socket.h>
55 /*
56  * We only use the xt_socket funcs within a similar context to avoid unexpected
57  * return values.
58  */
59 #define XT_SOCKET_SUPPORTED_HOOKS \
60         ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN))
61
62
63 static const char *module_procdirname = "xt_qtaguid";
64 static struct proc_dir_entry *xt_qtaguid_procdir;
65
66 static unsigned int proc_iface_perms = S_IRUGO;
67 module_param_named(iface_perms, proc_iface_perms, uint, S_IRUGO | S_IWUSR);
68
69 static struct proc_dir_entry *xt_qtaguid_stats_file;
70 static unsigned int proc_stats_perms = S_IRUGO;
71 module_param_named(stats_perms, proc_stats_perms, uint, S_IRUGO | S_IWUSR);
72
73 static struct proc_dir_entry *xt_qtaguid_ctrl_file;
74 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
75 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUGO;
76 #else
77 static unsigned int proc_ctrl_perms = S_IRUGO | S_IWUSR;
78 #endif
79 module_param_named(ctrl_perms, proc_ctrl_perms, uint, S_IRUGO | S_IWUSR);
80
81 #ifdef CONFIG_ANDROID_PARANOID_NETWORK
82 #include <linux/android_aid.h>
83 static gid_t proc_stats_readall_gid = AID_NET_BW_STATS;
84 static gid_t proc_ctrl_write_gid = AID_NET_BW_ACCT;
85 #else
86 /* 0 means, don't limit anybody */
87 static gid_t proc_stats_readall_gid;
88 static gid_t proc_ctrl_write_gid;
89 #endif
90 module_param_named(stats_readall_gid, proc_stats_readall_gid, uint,
91                    S_IRUGO | S_IWUSR);
92 module_param_named(ctrl_write_gid, proc_ctrl_write_gid, uint,
93                    S_IRUGO | S_IWUSR);
94
95 /*
96  * After the kernel has initiallized this module, it is still possible
97  * to make it passive:
98  *  - do not register it via iptables.
99  *   the matching code will not be invoked.
100  *  - set passive to 0
101  *   the iface stats handling will not be act on notifications.
102  * This is mostly usefull when a bug is suspected.
103  */
104 static bool module_passive;
105 module_param_named(passive, module_passive, bool, S_IRUGO | S_IWUSR);
106
107 /*---------------------------------------------------------------------------*/
108 /*
109  * Tags:
110  *
111  * They represent what the data usage counters will be tracked against.
112  * By default a tag is just based on the UID.
113  * The UID is used as the base for policying, and can not be ignored.
114  * So a tag will always at least represent a UID (uid_tag).
115  *
116  * A tag can be augmented with an "accounting tag" which is associated
117  * with a UID.
118  * User space can set the acct_tag portion of the tag which is then used
119  * with sockets: all data belong to that socket will be counted against the
120  * tag. The policing is then based on the tag's uid_tag portion,
121  * and stats are collected for the acct_tag portion seperately.
122  *
123  * There could be
124  * a:  {acct_tag=1, uid_tag=10003}
125  * b:  {acct_tag=2, uid_tag=10003}
126  * c:  {acct_tag=3, uid_tag=10003}
127  * d:  {acct_tag=0, uid_tag=10003}
128  * (a, b, and c represent tags associated with specific sockets.
129  * d is for the totals for that uid, including all untagged traffic.
130  * Typically d is used with policing/quota rules.
131  *
132  * We want tag_t big enough to distinguish uid_t and acct_tag.
133  * It might become a struct if needed.
134  * Nothing should be using it as an int.
135  */
136 typedef uint64_t tag_t;  /* Only used via accessors */
137
138 static const char *iface_stat_procdirname = "iface_stat";
139 static struct proc_dir_entry *iface_stat_procdir;
140
141
142 /*
143  * For now we only track 2 sets of counters.
144  * The default set is 0.
145  * Userspace can activate another set for a given uid being tracked.
146  */
147 #define IFS_MAX_COUNTER_SETS 2
148
149 enum ifs_tx_rx {
150         IFS_TX,
151         IFS_RX,
152         IFS_MAX_DIRECTIONS
153 };
154
155 /* For now, TCP, UDP, the rest */
156 enum ifs_proto {
157         IFS_TCP,
158         IFS_UDP,
159         IFS_PROTO_OTHER,
160         IFS_MAX_PROTOS
161 };
162
163 struct byte_packet_counters {
164         uint64_t bytes;
165         uint64_t packets;
166 };
167
168 struct data_counters {
169         struct byte_packet_counters bpc[IFS_MAX_COUNTER_SETS][IFS_MAX_DIRECTIONS][IFS_MAX_PROTOS];
170 };
171
172 /* Generic tag based node used as a base for rb_tree ops. */
173 struct tag_node {
174         struct rb_node node;
175         tag_t tag;
176 };
177
178 struct tag_stat {
179         struct tag_node tn;
180         struct data_counters counters;
181         /*
182          * If this tag is acct_tag based, we need to count against the
183          * matching parent uid_tag.
184          */
185         struct data_counters *parent_counters;
186 };
187
188 struct iface_stat {
189         struct list_head list;
190         char *ifname;
191         uint64_t rx_bytes;
192         uint64_t rx_packets;
193         uint64_t tx_bytes;
194         uint64_t tx_packets;
195         bool active;
196         struct proc_dir_entry *proc_ptr;
197
198         struct rb_root tag_stat_tree;
199         spinlock_t tag_stat_list_lock;
200 };
201
202 static LIST_HEAD(iface_stat_list);
203 static DEFINE_SPINLOCK(iface_stat_list_lock);
204
205 /* This is needed to create proc_dir_entries from atomic context. */
206 struct iface_stat_work {
207         struct work_struct iface_work;
208         struct iface_stat *iface_entry;
209 };
210
211 /*
212  * Track tag that this socket is transferring data for, and not necessarily
213  * the uid that owns the socket.
214  * This is the tag against which tag_stat.counters will be billed.
215  */
216 struct sock_tag {
217         struct rb_node sock_node;
218         struct sock *sk;  /* Only used as a number, never dereferenced */
219         /* The socket is needed for sockfd_put() */
220         struct socket *socket;
221
222         tag_t tag;
223 };
224
225 static struct rb_root sock_tag_tree = RB_ROOT;
226 static DEFINE_SPINLOCK(sock_tag_list_lock);
227
228 /* Track the set active_set for the given tag. */
229 struct tag_counter_set {
230         struct tag_node tn;
231         int active_set;
232 };
233
234 static struct rb_root tag_counter_set_tree = RB_ROOT;
235 static DEFINE_SPINLOCK(tag_counter_set_list_lock);
236
237 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par);
238
239 /*----------------------------------------------*/
240 static inline int tag_compare(tag_t t1, tag_t t2)
241 {
242         return t1 < t2 ? -1 : t1 == t2 ? 0 : 1;
243 }
244
245 static inline tag_t combine_atag_with_uid(tag_t acct_tag, uid_t uid)
246 {
247         return acct_tag | uid;
248 }
249 static inline tag_t make_tag_from_uid(uid_t uid)
250 {
251         return uid;
252 }
253 static inline uid_t get_uid_from_tag(tag_t tag)
254 {
255         return tag & 0xFFFFFFFFULL;
256 }
257 static inline tag_t get_utag_from_tag(tag_t tag)
258 {
259         return tag & 0xFFFFFFFFULL;
260 }
261 static inline tag_t get_atag_from_tag(tag_t tag)
262 {
263         return tag & ~0xFFFFFFFFULL;
264 }
265
266 static inline bool valid_atag(tag_t tag)
267 {
268         return !(tag & 0xFFFFFFFFULL);
269 }
270
271 static inline void dc_add_byte_packets(struct data_counters *counters, int set,
272                                   enum ifs_tx_rx direction,
273                                   enum ifs_proto ifs_proto,
274                                   int bytes,
275                                   int packets)
276 {
277         counters->bpc[set][direction][ifs_proto].bytes += bytes;
278         counters->bpc[set][direction][ifs_proto].packets += packets;
279 }
280
281 static inline uint64_t dc_sum_bytes(struct data_counters *counters,
282                                     int set,
283                                     enum ifs_tx_rx direction)
284 {
285         return counters->bpc[set][direction][IFS_TCP].bytes
286                 + counters->bpc[set][direction][IFS_UDP].bytes
287                 + counters->bpc[set][direction][IFS_PROTO_OTHER].bytes;
288 }
289
290 static inline uint64_t dc_sum_packets(struct data_counters *counters,
291                                       int set,
292                                       enum ifs_tx_rx direction)
293 {
294         return counters->bpc[set][direction][IFS_TCP].packets
295                 + counters->bpc[set][direction][IFS_UDP].packets
296                 + counters->bpc[set][direction][IFS_PROTO_OTHER].packets;
297 }
298
299 static struct tag_node *tag_node_tree_search(struct rb_root *root, tag_t tag)
300 {
301         struct rb_node *node = root->rb_node;
302
303         while (node) {
304                 struct tag_node *data = rb_entry(node, struct tag_node, node);
305                 int result = tag_compare(tag, data->tag);
306                 RB_DEBUG("qtaguid: tag_node_tree_search(): tag=0x%llx"
307                          " (uid=%d)\n",
308                          data->tag,
309                          get_uid_from_tag(data->tag));
310
311                 if (result < 0)
312                         node = node->rb_left;
313                 else if (result > 0)
314                         node = node->rb_right;
315                 else
316                         return data;
317         }
318         return NULL;
319 }
320
321 static void tag_node_tree_insert(struct tag_node *data, struct rb_root *root)
322 {
323         struct rb_node **new = &(root->rb_node), *parent = NULL;
324
325         /* Figure out where to put new node */
326         while (*new) {
327                 struct tag_node *this = rb_entry(*new, struct tag_node,
328                                                  node);
329                 int result = tag_compare(data->tag, this->tag);
330                 RB_DEBUG("qtaguid: tag_node_tree_insert(): tag=0x%llx"
331                          " (uid=%d)\n",
332                          this->tag,
333                          get_uid_from_tag(this->tag));
334                 parent = *new;
335                 if (result < 0)
336                         new = &((*new)->rb_left);
337                 else if (result > 0)
338                         new = &((*new)->rb_right);
339                 else
340                         BUG();
341         }
342
343         /* Add new node and rebalance tree. */
344         rb_link_node(&data->node, parent, new);
345         rb_insert_color(&data->node, root);
346 }
347
348 static void tag_stat_tree_insert(struct tag_stat *data, struct rb_root *root)
349 {
350         tag_node_tree_insert(&data->tn, root);
351 }
352
353 static struct tag_stat *tag_stat_tree_search(struct rb_root *root, tag_t tag)
354 {
355         struct tag_node *node = tag_node_tree_search(root, tag);
356         if (!node)
357                 return NULL;
358         return rb_entry(&node->node, struct tag_stat, tn.node);
359 }
360
361 static void tag_counter_set_tree_insert(struct tag_counter_set *data,
362                                         struct rb_root *root)
363 {
364         tag_node_tree_insert(&data->tn, root);
365 }
366
367 static struct tag_counter_set *tag_counter_set_tree_search(struct rb_root *root,
368                                                            tag_t tag)
369 {
370         struct tag_node *node = tag_node_tree_search(root, tag);
371         if (!node)
372                 return NULL;
373         return rb_entry(&node->node, struct tag_counter_set, tn.node);
374
375 }
376
377 static struct sock_tag *sock_tag_tree_search(struct rb_root *root,
378                                              const struct sock *sk)
379 {
380         struct rb_node *node = root->rb_node;
381
382         while (node) {
383                 struct sock_tag *data = rb_entry(node, struct sock_tag,
384                                                  sock_node);
385                 ptrdiff_t result = sk - data->sk;
386                 if (result < 0)
387                         node = node->rb_left;
388                 else if (result > 0)
389                         node = node->rb_right;
390                 else
391                         return data;
392         }
393         return NULL;
394 }
395
396 static void sock_tag_tree_insert(struct sock_tag *data, struct rb_root *root)
397 {
398         struct rb_node **new = &(root->rb_node), *parent = NULL;
399
400         /* Figure out where to put new node */
401         while (*new) {
402                 struct sock_tag *this = rb_entry(*new, struct sock_tag,
403                                                  sock_node);
404                 ptrdiff_t result = data->sk - this->sk;
405                 parent = *new;
406                 if (result < 0)
407                         new = &((*new)->rb_left);
408                 else if (result > 0)
409                         new = &((*new)->rb_right);
410                 else
411                         BUG();
412         }
413
414         /* Add new node and rebalance tree. */
415         rb_link_node(&data->sock_node, parent, new);
416         rb_insert_color(&data->sock_node, root);
417 }
418
419 static int read_proc_u64(char *page, char **start, off_t off,
420                         int count, int *eof, void *data)
421 {
422         int len;
423         uint64_t value;
424         char *p = page;
425         uint64_t *iface_entry = data;
426
427         if (!data)
428                 return 0;
429
430         value = *iface_entry;
431         p += sprintf(p, "%llu\n", value);
432         len = (p - page) - off;
433         *eof = (len <= count) ? 1 : 0;
434         *start = page + off;
435         return len;
436 }
437
438 static int read_proc_bool(char *page, char **start, off_t off,
439                         int count, int *eof, void *data)
440 {
441         int len;
442         bool value;
443         char *p = page;
444         bool *bool_entry = data;
445
446         if (!data)
447                 return 0;
448
449         value = *bool_entry;
450         p += sprintf(p, "%u\n", value);
451         len = (p - page) - off;
452         *eof = (len <= count) ? 1 : 0;
453         *start = page + off;
454         return len;
455 }
456
457 static int get_active_counter_set(tag_t tag)
458 {
459         int active_set = 0;
460         struct tag_counter_set *tcs;
461
462         MT_DEBUG("qtaguid: get_active_counter_set(tag=0x%llx)"
463                  " (uid=%u)\n",
464                  tag, get_uid_from_tag(tag));
465         /* For now we only handle UID tags for active sets */
466         tag = get_utag_from_tag(tag);
467         spin_lock_bh(&tag_counter_set_list_lock);
468         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
469         if (tcs)
470                 active_set = tcs->active_set;
471         spin_unlock_bh(&tag_counter_set_list_lock);
472         return active_set;
473 }
474
475 /*
476  * Find the entry for tracking the specified interface.
477  * Caller must hold iface_stat_list_lock
478  */
479 static struct iface_stat *get_iface_entry(const char *ifname)
480 {
481         struct iface_stat *iface_entry;
482
483         /* Find the entry for tracking the specified tag within the interface */
484         if (ifname == NULL) {
485                 pr_info("qtaguid: iface_stat: get() NULL device name\n");
486                 return NULL;
487         }
488
489         /* Iterate over interfaces */
490         list_for_each_entry(iface_entry, &iface_stat_list, list) {
491                 if (!strcmp(ifname, iface_entry->ifname))
492                         goto done;
493         }
494         iface_entry = NULL;
495 done:
496         return iface_entry;
497 }
498
499 static void iface_create_proc_worker(struct work_struct *work)
500 {
501         struct proc_dir_entry *proc_entry;
502         struct iface_stat_work *isw = container_of(work, struct iface_stat_work,
503                                                    iface_work);
504         struct iface_stat *new_iface  = isw->iface_entry;
505
506         /* iface_entries are not deleted, so safe to manipulate. */
507         proc_entry = proc_mkdir(new_iface->ifname, iface_stat_procdir);
508         if (IS_ERR_OR_NULL(proc_entry)) {
509                 pr_err("qtaguid: iface_stat: create_proc(): alloc failed.\n");
510                 kfree(isw);
511                 return;
512         }
513
514         new_iface->proc_ptr = proc_entry;
515
516         create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
517                         read_proc_u64, &new_iface->tx_bytes);
518         create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
519                         read_proc_u64, &new_iface->rx_bytes);
520         create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
521                         read_proc_u64, &new_iface->tx_packets);
522         create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
523                         read_proc_u64, &new_iface->rx_packets);
524         create_proc_read_entry("active", proc_iface_perms, proc_entry,
525                         read_proc_bool, &new_iface->active);
526
527         IF_DEBUG("qtaguid: iface_stat: create_proc(): done "
528                  "entry=%p dev=%s\n", new_iface, new_iface->ifname);
529         kfree(isw);
530 }
531
532 /* Caller must hold iface_stat_list_lock */
533 static struct iface_stat *iface_alloc(const char *ifname)
534 {
535         struct iface_stat *new_iface;
536         struct iface_stat_work *isw;
537
538         new_iface = kzalloc(sizeof(*new_iface), GFP_ATOMIC);
539         if (new_iface == NULL) {
540                 pr_err("qtaguid: iface_stat: create(%s): "
541                        "iface_stat alloc failed\n", ifname);
542                 return NULL;
543         }
544         new_iface->ifname = kstrdup(ifname, GFP_ATOMIC);
545         if (new_iface->ifname == NULL) {
546                 pr_err("qtaguid: iface_stat: create(%s): "
547                        "ifname alloc failed\n", ifname);
548                 kfree(new_iface);
549                 return NULL;
550         }
551         spin_lock_init(&new_iface->tag_stat_list_lock);
552         new_iface->active = true;
553         new_iface->tag_stat_tree = RB_ROOT;
554
555         /*
556          * ipv6 notifier chains are atomic :(
557          * No create_proc_read_entry() for you!
558          */
559         isw = kmalloc(sizeof(*isw), GFP_ATOMIC);
560         if (!isw) {
561                 pr_err("qtaguid: iface_stat: create(%s): "
562                        "work alloc failed\n", new_iface->ifname);
563                 kfree(new_iface->ifname);
564                 kfree(new_iface);
565                 return NULL;
566         }
567         isw->iface_entry = new_iface;
568         INIT_WORK(&isw->iface_work, iface_create_proc_worker);
569         schedule_work(&isw->iface_work);
570         list_add(&new_iface->list, &iface_stat_list);
571         return new_iface;
572 }
573
574 /*
575  * Create a new entry for tracking the specified interface.
576  * Do nothing if the entry already exists.
577  * Called when an interface is configured with a valid IP address.
578  */
579 void iface_stat_create(const struct net_device *net_dev,
580                        struct in_ifaddr *ifa)
581 {
582         struct in_device *in_dev = NULL;
583         const char *ifname;
584         struct iface_stat *entry;
585         __be32 ipaddr = 0;
586         struct iface_stat *new_iface;
587
588         IF_DEBUG("qtaguid: iface_stat: create(%s): ifa=%p netdev=%p\n",
589                  net_dev ? net_dev->name : "?",
590                  ifa, net_dev);
591         if (!net_dev) {
592                 pr_err("qtaguid: iface_stat: create(): no net dev\n");
593                 return;
594         }
595
596         ifname = net_dev->name;
597         if (!ifa) {
598                 in_dev = in_dev_get(net_dev);
599                 if (!in_dev) {
600                         pr_err("qtaguid: iface_stat: create(%s): no inet dev\n",
601                                ifname);
602                         return;
603                 }
604                 IF_DEBUG("qtaguid: iface_stat: create(%s): in_dev=%p\n",
605                          ifname, in_dev);
606                 for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
607                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
608                                  "ifa=%p ifa_label=%s\n",
609                                  ifname, ifa,
610                                  ifa->ifa_label ? ifa->ifa_label : "(null)");
611                         if (ifa->ifa_label && !strcmp(ifname, ifa->ifa_label))
612                                 break;
613                 }
614         }
615
616         if (!ifa) {
617                 IF_DEBUG("qtaguid: iface_stat: create(%s): no matching IP\n",
618                          ifname);
619                 goto done_put;
620         }
621         ipaddr = ifa->ifa_local;
622
623         spin_lock_bh(&iface_stat_list_lock);
624         entry = get_iface_entry(ifname);
625         if (entry != NULL) {
626                 IF_DEBUG("qtaguid: iface_stat: create(%s): entry=%p\n",
627                          ifname, entry);
628                 if (ipv4_is_loopback(ipaddr)) {
629                         entry->active = false;
630                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
631                                  "disable tracking of loopback dev\n",
632                                  ifname);
633                 } else {
634                         entry->active = true;
635                         IF_DEBUG("qtaguid: iface_stat: create(%s): "
636                                  "enable tracking. ip=%pI4\n",
637                                  ifname, &ipaddr);
638                 }
639                 goto done_unlock_put;
640         } else if (ipv4_is_loopback(ipaddr)) {
641                 IF_DEBUG("qtaguid: iface_stat: create(%s): "
642                          "ignore loopback dev. ip=%pI4\n", ifname, &ipaddr);
643                 goto done_unlock_put;
644         }
645
646         new_iface = iface_alloc(ifname);
647         IF_DEBUG("qtaguid: iface_stat: create(%s): done "
648                  "entry=%p ip=%pI4\n", ifname, new_iface, &ipaddr);
649
650 done_unlock_put:
651         spin_unlock_bh(&iface_stat_list_lock);
652 done_put:
653         if (in_dev)
654                 in_dev_put(in_dev);
655 }
656
657 void iface_stat_create_ipv6(const struct net_device *net_dev,
658                             struct inet6_ifaddr *ifa)
659 {
660         struct in_device *in_dev;
661         const char *ifname;
662         struct iface_stat *entry;
663         struct iface_stat *new_iface;
664         int addr_type;
665
666         IF_DEBUG("qtaguid: iface_stat: create6(): ifa=%p netdev=%p->name=%s\n",
667                  ifa, net_dev, net_dev ? net_dev->name : "");
668         if (!net_dev) {
669                 pr_err("qtaguid: iface_stat: create6(): no net dev!\n");
670                 return;
671         }
672         ifname = net_dev->name;
673
674         in_dev = in_dev_get(net_dev);
675         if (!in_dev) {
676                 pr_err("qtaguid: iface_stat: create6(%s): no inet dev\n",
677                        ifname);
678                 return;
679         }
680
681         IF_DEBUG("qtaguid: iface_stat: create6(%s): in_dev=%p\n",
682                  ifname, in_dev);
683
684         if (!ifa) {
685                 IF_DEBUG("qtaguid: iface_stat: create6(%s): no matching IP\n",
686                          ifname);
687                 goto done_put;
688         }
689         addr_type = ipv6_addr_type(&ifa->addr);
690
691         spin_lock_bh(&iface_stat_list_lock);
692         entry = get_iface_entry(ifname);
693         if (entry != NULL) {
694                 IF_DEBUG("qtaguid: iface_stat: create6(%s): entry=%p\n",
695                          ifname, entry);
696                 if (addr_type & IPV6_ADDR_LOOPBACK) {
697                         entry->active = false;
698                         IF_DEBUG("qtaguid: iface_stat: create6(%s): "
699                                  "disable tracking of loopback dev\n",
700                                  ifname);
701                 } else {
702                         entry->active = true;
703                         IF_DEBUG("qtaguid: iface_stat: create6(%s): "
704                                  "enable tracking. ip=%pI6c\n",
705                                  ifname, &ifa->addr);
706                 }
707                 goto done_unlock_put;
708         } else if (addr_type & IPV6_ADDR_LOOPBACK) {
709                 IF_DEBUG("qtaguid: iface_stat: create6(%s): "
710                          "ignore loopback dev. ip=%pI6c\n",
711                          ifname, &ifa->addr);
712                 goto done_unlock_put;
713         }
714
715         new_iface = iface_alloc(ifname);
716         IF_DEBUG("qtaguid: iface_stat: create6(%s): done "
717                  "entry=%p ip=%pI6c\n", ifname, new_iface, &ifa->addr);
718
719 done_unlock_put:
720         spin_unlock_bh(&iface_stat_list_lock);
721 done_put:
722         in_dev_put(in_dev);
723 }
724
725 static struct sock_tag *get_sock_stat_nl(const struct sock *sk)
726 {
727         MT_DEBUG("qtaguid: get_sock_stat_nl(sk=%p)\n", sk);
728         return sock_tag_tree_search(&sock_tag_tree, sk);
729 }
730
731 static struct sock_tag *get_sock_stat(const struct sock *sk)
732 {
733         struct sock_tag *sock_tag_entry;
734         MT_DEBUG("qtaguid: get_sock_stat(sk=%p)\n", sk);
735         if (!sk)
736                 return NULL;
737         spin_lock_bh(&sock_tag_list_lock);
738         sock_tag_entry = get_sock_stat_nl(sk);
739         spin_unlock_bh(&sock_tag_list_lock);
740         return sock_tag_entry;
741 }
742
743 static void
744 data_counters_update(struct data_counters *dc, int set,
745                      enum ifs_tx_rx direction, int proto, int bytes)
746 {
747         switch (proto) {
748         case IPPROTO_TCP:
749                 dc_add_byte_packets(dc, set, direction, IFS_TCP, bytes, 1);
750                 break;
751         case IPPROTO_UDP:
752                 dc_add_byte_packets(dc, set, direction, IFS_UDP, bytes, 1);
753                 break;
754         case IPPROTO_IP:
755         default:
756                 dc_add_byte_packets(dc, set, direction, IFS_PROTO_OTHER, bytes,
757                                     1);
758                 break;
759         }
760 }
761
762 /*
763  * Update stats for the specified interface. Do nothing if the entry
764  * does not exist (when a device was never configured with an IP address).
765  * Called when an device is being unregistered.
766  */
767 static void iface_stat_update(struct net_device *dev)
768 {
769         struct rtnl_link_stats64 dev_stats, *stats;
770         struct iface_stat *entry;
771
772         stats = dev_get_stats(dev, &dev_stats);
773         spin_lock_bh(&iface_stat_list_lock);
774         entry = get_iface_entry(dev->name);
775         if (entry == NULL) {
776                 IF_DEBUG("qtaguid: iface_stat_update: dev=%s not tracked\n",
777                          dev->name);
778                 spin_unlock_bh(&iface_stat_list_lock);
779                 return;
780         }
781         IF_DEBUG("qtaguid: iface_stat_update: dev=%s entry=%p\n",
782                  dev->name, entry);
783         if (entry->active) {
784                 entry->tx_bytes += stats->tx_bytes;
785                 entry->tx_packets += stats->tx_packets;
786                 entry->rx_bytes += stats->rx_bytes;
787                 entry->rx_packets += stats->rx_packets;
788                 entry->active = false;
789         } else {
790                 IF_DEBUG("qtaguid: iface_stat_update: dev=%s inactive\n",
791                         dev->name);
792         }
793         spin_unlock_bh(&iface_stat_list_lock);
794 }
795
796 static void tag_stat_update(struct tag_stat *tag_entry,
797                         enum ifs_tx_rx direction, int proto, int bytes)
798 {
799         int active_set;
800         active_set = get_active_counter_set(tag_entry->tn.tag);
801         MT_DEBUG("qtaguid: tag_stat_update(tag=0x%llx (uid=%u) set=%d "
802                  "dir=%d proto=%d bytes=%d)\n",
803                  tag_entry->tn.tag, get_uid_from_tag(tag_entry->tn.tag),
804                  active_set, direction, proto, bytes);
805         data_counters_update(&tag_entry->counters, active_set, direction,
806                              proto, bytes);
807         if (tag_entry->parent_counters)
808                 data_counters_update(tag_entry->parent_counters, active_set,
809                                      direction, proto, bytes);
810 }
811
812 /*
813  * Create a new entry for tracking the specified {acct_tag,uid_tag} within
814  * the interface.
815  * iface_entry->tag_stat_list_lock should be held.
816  */
817 static struct tag_stat *create_if_tag_stat(struct iface_stat *iface_entry,
818                                            tag_t tag)
819 {
820         struct tag_stat *new_tag_stat_entry = NULL;
821         IF_DEBUG("qtaguid: iface_stat: create_if_tag_stat(): ife=%p tag=0x%llx"
822                  " (uid=%u)\n",
823                  iface_entry, tag, get_uid_from_tag(tag));
824         new_tag_stat_entry = kzalloc(sizeof(*new_tag_stat_entry), GFP_ATOMIC);
825         if (!new_tag_stat_entry) {
826                 pr_err("qtaguid: iface_stat: tag stat alloc failed\n");
827                 goto done;
828         }
829         new_tag_stat_entry->tn.tag = tag;
830         tag_stat_tree_insert(new_tag_stat_entry, &iface_entry->tag_stat_tree);
831 done:
832         return new_tag_stat_entry;
833 }
834
835 static void if_tag_stat_update(const char *ifname, uid_t uid,
836                                const struct sock *sk, enum ifs_tx_rx direction,
837                                int proto, int bytes)
838 {
839         struct tag_stat *tag_stat_entry;
840         tag_t tag, acct_tag;
841         tag_t uid_tag;
842         struct data_counters *uid_tag_counters;
843         struct sock_tag *sock_tag_entry;
844         struct iface_stat *iface_entry;
845         struct tag_stat *new_tag_stat;
846         MT_DEBUG("qtaguid: if_tag_stat_update(ifname=%s "
847                 "uid=%u sk=%p dir=%d proto=%d bytes=%d)\n",
848                  ifname, uid, sk, direction, proto, bytes);
849
850
851         iface_entry = get_iface_entry(ifname);
852         if (!iface_entry) {
853                 pr_err("qtaguid: iface_stat: stat_update() %s not found\n",
854                        ifname);
855                 return;
856         }
857         /* It is ok to process data when an iface_entry is inactive */
858
859         MT_DEBUG("qtaguid: iface_stat: stat_update() dev=%s entry=%p\n",
860                  ifname, iface_entry);
861
862         /*
863          * Look for a tagged sock.
864          * It will have an acct_uid.
865          */
866         sock_tag_entry = get_sock_stat(sk);
867         if (sock_tag_entry) {
868                 tag = sock_tag_entry->tag;
869                 acct_tag = get_atag_from_tag(tag);
870                 uid_tag = get_utag_from_tag(tag);
871         } else {
872                 uid_tag = make_tag_from_uid(uid);
873                 acct_tag = 0;
874                 tag = combine_atag_with_uid(acct_tag, uid);
875         }
876         MT_DEBUG("qtaguid: iface_stat: stat_update(): "
877                  " looking for tag=0x%llx (uid=%u) in ife=%p\n",
878                  tag, get_uid_from_tag(tag), iface_entry);
879         /* Loop over tag list under this interface for {acct_tag,uid_tag} */
880         spin_lock_bh(&iface_entry->tag_stat_list_lock);
881
882         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
883                                               tag);
884         if (tag_stat_entry) {
885                 /*
886                  * Updating the {acct_tag, uid_tag} entry handles both stats:
887                  * {0, uid_tag} will also get updated.
888                  */
889                 tag_stat_update(tag_stat_entry, direction, proto, bytes);
890                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
891                 return;
892         }
893
894         /* Loop over tag list under this interface for {0,uid_tag} */
895         tag_stat_entry = tag_stat_tree_search(&iface_entry->tag_stat_tree,
896                                               uid_tag);
897         if (!tag_stat_entry) {
898                 /* Here: the base uid_tag did not exist */
899                 /*
900                  * No parent counters. So
901                  *  - No {0, uid_tag} stats and no {acc_tag, uid_tag} stats.
902                  */
903                 new_tag_stat = create_if_tag_stat(iface_entry, uid_tag);
904                 uid_tag_counters = &new_tag_stat->counters;
905         } else {
906                 uid_tag_counters = &tag_stat_entry->counters;
907         }
908
909         if (acct_tag) {
910                 new_tag_stat = create_if_tag_stat(iface_entry, tag);
911                 new_tag_stat->parent_counters = uid_tag_counters;
912         }
913         spin_unlock_bh(&iface_entry->tag_stat_list_lock);
914         tag_stat_update(new_tag_stat, direction, proto, bytes);
915 }
916
917 static int iface_netdev_event_handler(struct notifier_block *nb,
918                                       unsigned long event, void *ptr) {
919         struct net_device *dev = ptr;
920
921         if (unlikely(module_passive))
922                 return NOTIFY_DONE;
923
924         IF_DEBUG("qtaguid: iface_stat: netdev_event(): "
925                  "ev=0x%lx netdev=%p->name=%s\n",
926                  event, dev, dev ? dev->name : "");
927
928         switch (event) {
929         case NETDEV_UP:
930                 iface_stat_create(dev, NULL);
931                 break;
932         case NETDEV_UNREGISTER:
933                 iface_stat_update(dev);
934                 break;
935         }
936         return NOTIFY_DONE;
937 }
938
939 static int iface_inet6addr_event_handler(struct notifier_block *nb,
940                                          unsigned long event, void *ptr)
941 {
942         struct inet6_ifaddr *ifa = ptr;
943         struct net_device *dev;
944
945         if (unlikely(module_passive))
946                 return NOTIFY_DONE;
947
948         IF_DEBUG("qtaguid: iface_stat: inet6addr_event(): "
949                  "ev=0x%lx ifa=%p\n",
950                  event, ifa);
951
952         switch (event) {
953         case NETDEV_UP:
954                 BUG_ON(!ifa || !ifa->idev);
955                 dev = (struct net_device *)ifa->idev->dev;
956                 iface_stat_create_ipv6(dev, ifa);
957                 break;
958         }
959         return NOTIFY_DONE;
960 }
961
962 static int iface_inetaddr_event_handler(struct notifier_block *nb,
963                                         unsigned long event, void *ptr)
964 {
965         struct in_ifaddr *ifa = ptr;
966         struct net_device *dev;
967
968         if (unlikely(module_passive))
969                 return NOTIFY_DONE;
970
971         IF_DEBUG("qtaguid: iface_stat: inetaddr_event(): "
972                  "ev=0x%lx ifa=%p\n",
973                  event, ifa);
974
975         switch (event) {
976         case NETDEV_UP:
977                 BUG_ON(!ifa || !ifa->ifa_dev);
978                 dev = ifa->ifa_dev->dev;
979                 iface_stat_create(dev, ifa);
980                 break;
981         }
982         return NOTIFY_DONE;
983 }
984
985 static struct notifier_block iface_netdev_notifier_blk = {
986         .notifier_call = iface_netdev_event_handler,
987 };
988
989 static struct notifier_block iface_inetaddr_notifier_blk = {
990         .notifier_call = iface_inetaddr_event_handler,
991 };
992
993 static struct notifier_block iface_inet6addr_notifier_blk = {
994         .notifier_call = iface_inet6addr_event_handler,
995 };
996
997 static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
998 {
999         int err;
1000
1001         iface_stat_procdir = proc_mkdir(iface_stat_procdirname, parent_procdir);
1002         if (!iface_stat_procdir) {
1003                 pr_err("qtaguid: iface_stat: init failed to create proc entry\n");
1004                 err = -1;
1005                 goto err;
1006         }
1007         err = register_netdevice_notifier(&iface_netdev_notifier_blk);
1008         if (err) {
1009                 pr_err("qtaguid: iface_stat: init "
1010                        "failed to register dev event handler\n");
1011                 goto err_zap_entry;
1012         }
1013         err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1014         if (err) {
1015                 pr_err("qtaguid: iface_stat: init "
1016                        "failed to register ipv4 dev event handler\n");
1017                 goto err_unreg_nd;
1018         }
1019
1020         err = register_inet6addr_notifier(&iface_inet6addr_notifier_blk);
1021         if (err) {
1022                 pr_err("qtaguid: iface_stat: init "
1023                        "failed to register ipv6 dev event handler\n");
1024                 goto err_unreg_ip4_addr;
1025         }
1026         return 0;
1027
1028 err_unreg_ip4_addr:
1029         unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
1030 err_unreg_nd:
1031         unregister_netdevice_notifier(&iface_netdev_notifier_blk);
1032 err_zap_entry:
1033         remove_proc_entry(iface_stat_procdirname, parent_procdir);
1034 err:
1035         return err;
1036 }
1037
1038 static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
1039                                     struct xt_action_param *par)
1040 {
1041         struct sock *sk;
1042         unsigned int hook_mask = (1 << par->hooknum);
1043
1044         MT_DEBUG("qtaguid: find_sk(skb=%p) hooknum=%d family=%d\n", skb,
1045                  par->hooknum, par->family);
1046
1047         /*
1048          * Let's not abuse the the xt_socket_get*_sk(), or else it will
1049          * return garbage SKs.
1050          */
1051         if (!(hook_mask & XT_SOCKET_SUPPORTED_HOOKS))
1052                 return NULL;
1053
1054         switch (par->family) {
1055         case NFPROTO_IPV6:
1056                 sk = xt_socket_get6_sk(skb, par);
1057                 break;
1058         case NFPROTO_IPV4:
1059                 sk = xt_socket_get4_sk(skb, par);
1060                 break;
1061         default:
1062                 return NULL;
1063         }
1064
1065         /*
1066          * Seems to be issues on the file ptr for TCP_TIME_WAIT SKs.
1067          * http://kerneltrap.org/mailarchive/linux-netdev/2010/10/21/6287959
1068          * Not fixed in 3.0-r3 :(
1069          */
1070         if (sk) {
1071                 MT_DEBUG("qtaguid: %p->sk_proto=%u "
1072                          "->sk_state=%d\n", sk, sk->sk_protocol, sk->sk_state);
1073                 if (sk->sk_state  == TCP_TIME_WAIT) {
1074                         xt_socket_put_sk(sk);
1075                         sk = NULL;
1076                 }
1077         }
1078         return sk;
1079 }
1080
1081 static void account_for_uid(const struct sk_buff *skb,
1082                             const struct sock *alternate_sk, uid_t uid,
1083                             struct xt_action_param *par)
1084 {
1085         const struct net_device *el_dev;
1086
1087         if (!skb->dev) {
1088                 MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
1089                 el_dev = par->in ? : par->out;
1090         } else {
1091                 const struct net_device *other_dev;
1092                 el_dev = skb->dev;
1093                 other_dev = par->in ? : par->out;
1094                 if (el_dev != other_dev) {
1095                         MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
1096                                 "par->(in/out)=%p %s\n",
1097                                 par->hooknum, el_dev, el_dev->name, other_dev,
1098                                 other_dev->name);
1099                 }
1100         }
1101
1102         if (unlikely(!el_dev)) {
1103                 pr_info("qtaguid[%d]: no par->in/out?!!\n", par->hooknum);
1104         } else if (unlikely(!el_dev->name)) {
1105                 pr_info("qtaguid[%d]: no dev->name?!!\n", par->hooknum);
1106         } else {
1107                 MT_DEBUG("qtaguid[%d]: dev name=%s type=%d\n",
1108                          par->hooknum,
1109                          el_dev->name,
1110                          el_dev->type);
1111
1112                 if_tag_stat_update(el_dev->name, uid,
1113                                 skb->sk ? skb->sk : alternate_sk,
1114                                 par->in ? IFS_RX : IFS_TX,
1115                                 ip_hdr(skb)->protocol, skb->len);
1116         }
1117 }
1118
1119 static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
1120 {
1121         const struct xt_qtaguid_match_info *info = par->matchinfo;
1122         const struct file *filp;
1123         bool got_sock = false;
1124         struct sock *sk;
1125         uid_t sock_uid;
1126         bool res;
1127
1128         if (unlikely(module_passive))
1129                 return (info->match ^ info->invert) == 0;
1130
1131         MT_DEBUG("qtaguid[%d]: entered skb=%p par->in=%p/out=%p fam=%d\n",
1132                  par->hooknum, skb, par->in, par->out, par->family);
1133
1134         if (skb == NULL) {
1135                 res = (info->match ^ info->invert) == 0;
1136                 goto ret_res;
1137         }
1138
1139         sk = skb->sk;
1140
1141         if (sk == NULL) {
1142                 /*
1143                  * A missing sk->sk_socket happens when packets are in-flight
1144                  * and the matching socket is already closed and gone.
1145                  */
1146                 sk = qtaguid_find_sk(skb, par);
1147                 /*
1148                  * If we got the socket from the find_sk(), we will need to put
1149                  * it back, as nf_tproxy_get_sock_v4() got it.
1150                  */
1151                 got_sock = sk;
1152         }
1153         MT_DEBUG("qtaguid[%d]: sk=%p got_sock=%d proto=%d\n",
1154                 par->hooknum, sk, got_sock, ip_hdr(skb)->protocol);
1155         if (sk != NULL) {
1156                 MT_DEBUG("qtaguid[%d]: sk=%p->sk_socket=%p->file=%p\n",
1157                         par->hooknum, sk, sk->sk_socket,
1158                         sk->sk_socket ? sk->sk_socket->file : (void *)-1LL);
1159                 filp = sk->sk_socket ? sk->sk_socket->file : NULL;
1160                 MT_DEBUG("qtaguid[%d]: filp...uid=%u\n",
1161                         par->hooknum, filp ? filp->f_cred->fsuid : -1);
1162         }
1163
1164         if (sk == NULL || sk->sk_socket == NULL) {
1165                 /*
1166                  * Here, the qtaguid_find_sk() using connection tracking
1167                  * couldn't find the owner, so for now we just count them
1168                  * against the system.
1169                  */
1170                 /*
1171                  * TODO: unhack how to force just accounting.
1172                  * For now we only do iface stats when the uid-owner is not
1173                  * requested.
1174                  */
1175                 if (!(info->match & XT_QTAGUID_UID))
1176                         account_for_uid(skb, sk, 0, par);
1177                 MT_DEBUG("qtaguid[%d]: leaving (sk?sk->sk_socket)=%p\n",
1178                         par->hooknum,
1179                         sk ? sk->sk_socket : NULL);
1180                 res = (info->match ^ info->invert) == 0;
1181                 goto put_sock_ret_res;
1182         } else if (info->match & info->invert & XT_QTAGUID_SOCKET) {
1183                 res = false;
1184                 goto put_sock_ret_res;
1185         }
1186         filp = sk->sk_socket->file;
1187         if (filp == NULL) {
1188                 MT_DEBUG("qtaguid[%d]: leaving filp=NULL\n", par->hooknum);
1189                 res = ((info->match ^ info->invert) &
1190                         (XT_QTAGUID_UID | XT_QTAGUID_GID)) == 0;
1191                 goto put_sock_ret_res;
1192         }
1193         sock_uid = filp->f_cred->fsuid;
1194         /*
1195          * TODO: unhack how to force just accounting.
1196          * For now we only do iface stats when the uid-owner is not requested
1197          */
1198         if (!(info->match & XT_QTAGUID_UID))
1199                 account_for_uid(skb, sk, sock_uid, par);
1200
1201         /*
1202          * The following two tests fail the match when:
1203          *    id not in range AND no inverted condition requested
1204          * or id     in range AND    inverted condition requested
1205          * Thus (!a && b) || (a && !b) == a ^ b
1206          */
1207         if (info->match & XT_QTAGUID_UID)
1208                 if ((filp->f_cred->fsuid >= info->uid_min &&
1209                      filp->f_cred->fsuid <= info->uid_max) ^
1210                     !(info->invert & XT_QTAGUID_UID)) {
1211                         MT_DEBUG("qtaguid[%d]: leaving uid not matching\n",
1212                                  par->hooknum);
1213                         res = false;
1214                         goto put_sock_ret_res;
1215                 }
1216         if (info->match & XT_QTAGUID_GID)
1217                 if ((filp->f_cred->fsgid >= info->gid_min &&
1218                                 filp->f_cred->fsgid <= info->gid_max) ^
1219                         !(info->invert & XT_QTAGUID_GID)) {
1220                         MT_DEBUG("qtaguid[%d]: leaving gid not matching\n",
1221                                 par->hooknum);
1222                         res = false;
1223                         goto put_sock_ret_res;
1224                 }
1225
1226         MT_DEBUG("qtaguid[%d]: leaving matched\n", par->hooknum);
1227         res = true;
1228
1229 put_sock_ret_res:
1230         if (got_sock)
1231                 xt_socket_put_sk(sk);
1232 ret_res:
1233         MT_DEBUG("qtaguid[%d]: left %d\n", par->hooknum, res);
1234         return res;
1235 }
1236
1237 /*
1238  * Procfs reader to get all active socket tags using style "1)" as described in
1239  * fs/proc/generic.c
1240  */
1241 static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
1242                                   off_t items_to_skip, int char_count, int *eof,
1243                                   void *data)
1244 {
1245         char *outp = page;
1246         int len;
1247         uid_t uid;
1248         struct sock_tag *sock_tag_entry;
1249         struct rb_node *node;
1250         int item_index = 0;
1251
1252         if (unlikely(module_passive)) {
1253                 *eof = 1;
1254                 return 0;
1255         }
1256
1257         CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
1258                 page, items_to_skip, char_count, *eof);
1259
1260         if (*eof)
1261                 return 0;
1262
1263         spin_lock_bh(&sock_tag_list_lock);
1264         for (node = rb_first(&sock_tag_tree);
1265              node;
1266              node = rb_next(node)) {
1267                 if (item_index++ < items_to_skip)
1268                         continue;
1269                 sock_tag_entry = rb_entry(node, struct sock_tag, sock_node);
1270                 uid = get_uid_from_tag(sock_tag_entry->tag);
1271                 CT_DEBUG("qtaguid: proc_read(): sk=%p tag=0x%llx (uid=%u)\n",
1272                          sock_tag_entry->sk,
1273                          sock_tag_entry->tag,
1274                          uid
1275                         );
1276                 len = snprintf(outp, char_count,
1277                                "sock=%p tag=0x%llx (uid=%u)\n",
1278                                sock_tag_entry->sk, sock_tag_entry->tag, uid);
1279                 if (len >= char_count) {
1280                         spin_unlock_bh(&sock_tag_list_lock);
1281                         *outp = '\0';
1282                         return outp - page;
1283                 }
1284                 outp += len;
1285                 char_count -= len;
1286                 (*num_items_returned)++;
1287         }
1288         spin_unlock_bh(&sock_tag_list_lock);
1289         *eof = 1;
1290         return outp - page;
1291 }
1292
1293 static bool can_manipulate_uids(void)
1294 {
1295         /* root pwnd */
1296         return unlikely(!current_fsuid()) || unlikely(!proc_ctrl_write_gid)
1297                 || in_egroup_p(proc_ctrl_write_gid);
1298 }
1299
1300 static bool can_impersonate_uid(uid_t uid)
1301 {
1302         return uid == current_fsuid() || can_manipulate_uids();
1303 }
1304
1305 static bool can_read_other_uid_stats(uid_t uid)
1306 {
1307         /* root pwnd */
1308         return unlikely(!current_fsuid()) || uid == current_fsuid()
1309                 || unlikely(!proc_stats_readall_gid)
1310                 || in_egroup_p(proc_stats_readall_gid);
1311 }
1312
1313 /*
1314  * Delete socket tags, and stat tags associated with a given
1315  * accouting tag and uid.
1316  */
1317 static int ctrl_cmd_delete(const char *input)
1318 {
1319         char cmd;
1320         uid_t uid;
1321         uid_t entry_uid;
1322         tag_t acct_tag;
1323         tag_t tag;
1324         int res, argc;
1325         struct iface_stat *iface_entry;
1326         struct rb_node *node;
1327         struct sock_tag *st_entry;
1328         struct rb_root st_to_free_tree = RB_ROOT;
1329         struct tag_stat *ts_entry;
1330         struct tag_counter_set *tcs_entry;
1331
1332         argc = sscanf(input, "%c %llu %u", &cmd, &acct_tag, &uid);
1333         CT_DEBUG("qtaguid: ctrl_delete(%s): argc=%d cmd=%c "
1334                  "user_tag=0x%llx uid=%u\n", input, argc, cmd,
1335                  acct_tag, uid);
1336         if (argc < 2) {
1337                 res = -EINVAL;
1338                 goto err;
1339         }
1340         if (!valid_atag(acct_tag)) {
1341                 pr_info("qtaguid: ctrl_delete(%s): invalid tag\n", input);
1342                 res = -EINVAL;
1343                 goto err;
1344         }
1345         if (argc < 3) {
1346                 uid = current_fsuid();
1347         } else if (!can_impersonate_uid(uid)) {
1348                 pr_info("qtaguid: ctrl_delete(%s): "
1349                         "insufficient priv from pid=%u uid=%u\n",
1350                         input, current->pid, current_fsuid());
1351                 res = -EPERM;
1352                 goto err;
1353         }
1354
1355         /* Delete socket tags */
1356         spin_lock_bh(&sock_tag_list_lock);
1357         node = rb_first(&sock_tag_tree);
1358         while (node) {
1359                 st_entry = rb_entry(node, struct sock_tag, sock_node);
1360                 entry_uid = get_uid_from_tag(st_entry->tag);
1361                 node = rb_next(node);
1362                 if (entry_uid != uid)
1363                         continue;
1364
1365                 if (!acct_tag || st_entry->tag == tag) {
1366                         rb_erase(&st_entry->sock_node, &sock_tag_tree);
1367                         /* Can't sockfd_put() within spinlock, do it later. */
1368                         sock_tag_tree_insert(st_entry, &st_to_free_tree);
1369                 }
1370         }
1371         spin_unlock_bh(&sock_tag_list_lock);
1372
1373         node = rb_first(&st_to_free_tree);
1374         while (node) {
1375                 st_entry = rb_entry(node, struct sock_tag, sock_node);
1376                 node = rb_next(node);
1377                 CT_DEBUG("qtaguid: ctrl_delete(): "
1378                          "erase st: sk=%p tag=0x%llx (uid=%u)\n",
1379                          st_entry->sk,
1380                          st_entry->tag,
1381                          entry_uid);
1382                 rb_erase(&st_entry->sock_node, &st_to_free_tree);
1383                 sockfd_put(st_entry->socket);
1384                 kfree(st_entry);
1385         }
1386
1387         tag = combine_atag_with_uid(acct_tag, uid);
1388
1389         /* Delete tag counter-sets */
1390         spin_lock_bh(&tag_counter_set_list_lock);
1391         tcs_entry = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
1392         if (tcs_entry) {
1393                 CT_DEBUG("qtaguid: ctrl_delete(): "
1394                          "erase tcs: tag=0x%llx (uid=%u) set=%d\n",
1395                          tcs_entry->tn.tag,
1396                          get_uid_from_tag(tcs_entry->tn.tag),
1397                          tcs_entry->active_set);
1398                 rb_erase(&tcs_entry->tn.node, &tag_counter_set_tree);
1399                 kfree(tcs_entry);
1400         }
1401         spin_unlock_bh(&tag_counter_set_list_lock);
1402
1403         /*
1404          * If acct_tag is 0, then all entries belonging to uid are
1405          * erased.
1406          */
1407         spin_lock_bh(&iface_stat_list_lock);
1408         list_for_each_entry(iface_entry, &iface_stat_list, list) {
1409                 spin_lock_bh(&iface_entry->tag_stat_list_lock);
1410                 node = rb_first(&iface_entry->tag_stat_tree);
1411                 while (node) {
1412                         ts_entry = rb_entry(node, struct tag_stat, tn.node);
1413                         entry_uid = get_uid_from_tag(ts_entry->tn.tag);
1414                         node = rb_next(node);
1415                         if (entry_uid != uid)
1416                                 continue;
1417                         if (!acct_tag || ts_entry->tn.tag == tag) {
1418                                 CT_DEBUG("qtaguid: ctrl_delete(): "
1419                                          "erase ts: %s 0x%llx %u\n",
1420                                          iface_entry->ifname,
1421                                          get_atag_from_tag(ts_entry->tn.tag),
1422                                          entry_uid);
1423                                 rb_erase(&ts_entry->tn.node,
1424                                          &iface_entry->tag_stat_tree);
1425                                 kfree(ts_entry);
1426                         }
1427                 }
1428                 spin_unlock_bh(&iface_entry->tag_stat_list_lock);
1429         }
1430         spin_unlock_bh(&iface_stat_list_lock);
1431
1432         res = 0;
1433
1434 err:
1435         return res;
1436 }
1437
1438 static int ctrl_cmd_counter_set(const char *input)
1439 {
1440         char cmd;
1441         uid_t uid = 0;
1442         tag_t tag;
1443         int res, argc;
1444         struct tag_counter_set *tcs;
1445         int counter_set;
1446
1447         argc = sscanf(input, "%c %d %u", &cmd, &counter_set, &uid);
1448         CT_DEBUG("qtaguid: ctrl_counterset(%s): argc=%d cmd=%c "
1449                  "set=%d uid=%u\n", input, argc, cmd,
1450                  counter_set, uid);
1451         if (argc != 3) {
1452                 res = -EINVAL;
1453                 goto err;
1454         }
1455         if (counter_set < 0 || counter_set >= IFS_MAX_COUNTER_SETS) {
1456                 pr_info("qtaguid: ctrl_counterset(%s): invalid counter_set range\n",
1457                         input);
1458                 res = -EINVAL;
1459                 goto err;
1460         }
1461         if (!can_manipulate_uids()) {
1462                 pr_info("qtaguid: ctrl_counterset(%s): "
1463                         "insufficient priv from pid=%u uid=%u\n",
1464                         input, current->pid, current_fsuid());
1465                 res = -EPERM;
1466                 goto err;
1467         }
1468
1469         tag = make_tag_from_uid(uid);
1470         spin_lock_bh(&tag_counter_set_list_lock);
1471         tcs = tag_counter_set_tree_search(&tag_counter_set_tree, tag);
1472         if (!tcs) {
1473                 tcs = kzalloc(sizeof(*tcs), GFP_ATOMIC);
1474                 if (!tcs) {
1475                         spin_unlock_bh(&tag_counter_set_list_lock);
1476                         pr_err("qtaguid: ctrl_counterset(%s): "
1477                                "failed to alloc counter set\n",
1478                                input);
1479                         res = -ENOMEM;
1480                         goto err;
1481                 }
1482                 tcs->tn.tag = tag;
1483                 tag_counter_set_tree_insert(tcs, &tag_counter_set_tree);
1484                 CT_DEBUG("qtaguid: ctrl_counterset(%s): added tcs tag=0x%llx "
1485                          "(uid=%u) set=%d\n",
1486                          input, tag, get_uid_from_tag(tag), counter_set);
1487         }
1488         tcs->active_set = counter_set;
1489         spin_unlock_bh(&tag_counter_set_list_lock);
1490
1491         res = 0;
1492
1493 err:
1494         return res;
1495 }
1496
1497 static int ctrl_cmd_tag(const char *input)
1498 {
1499         char cmd;
1500         int sock_fd = 0;
1501         uid_t uid = 0;
1502         tag_t acct_tag = 0;
1503         struct socket *el_socket;
1504         int refcnt = -1;
1505         int res, argc;
1506         struct sock_tag *sock_tag_entry;
1507
1508         /* Unassigned args will get defaulted later. */
1509         argc = sscanf(input, "%c %d %llu %u", &cmd, &sock_fd, &acct_tag, &uid);
1510         CT_DEBUG("qtaguid: ctrl_tag(%s): argc=%d cmd=%c sock_fd=%d "
1511                  "acct_tag=0x%llx uid=%u\n", input, argc, cmd, sock_fd,
1512                  acct_tag, uid);
1513         if (argc < 2) {
1514                 res = -EINVAL;
1515                 goto err;
1516         }
1517         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
1518         if (!el_socket) {
1519                 pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
1520                         " sock_fd=%d err=%d\n", input, sock_fd, res);
1521                 goto err;
1522         }
1523         refcnt = atomic_read(&el_socket->file->f_count);
1524         CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%d\n",
1525                  input, refcnt);
1526         if (argc < 3) {
1527                 acct_tag = 0;
1528         } else if (!valid_atag(acct_tag)) {
1529                 pr_info("qtaguid: ctrl_tag(%s): invalid tag\n", input);
1530                 res = -EINVAL;
1531                 goto err_put;
1532         }
1533         CT_DEBUG("qtaguid: ctrl_tag(%s): "
1534                  "uid=%u euid=%u fsuid=%u "
1535                  "in_group=%d in_egroup=%d\n",
1536                  input, current_uid(), current_euid(), current_fsuid(),
1537                  in_group_p(proc_stats_readall_gid),
1538                  in_egroup_p(proc_stats_readall_gid));
1539         if (argc < 4) {
1540                 uid = current_fsuid();
1541         } else if (!can_impersonate_uid(uid)) {
1542                 pr_info("qtaguid: ctrl_tag(%s): "
1543                         "insufficient priv from pid=%u uid=%u\n",
1544                         input, current->pid, current_fsuid());
1545                 res = -EPERM;
1546                 goto err_put;
1547         }
1548
1549         spin_lock_bh(&sock_tag_list_lock);
1550         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
1551         if (sock_tag_entry) {
1552                 /*
1553                  * This is a re-tagging, so release the sock_fd that was
1554                  * locked at the time of the 1st tagging.
1555                  */
1556                 sockfd_put(sock_tag_entry->socket);
1557                 refcnt--;
1558                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
1559                                                             uid);
1560         } else {
1561                 sock_tag_entry = kzalloc(sizeof(*sock_tag_entry),
1562                                          GFP_ATOMIC);
1563                 if (!sock_tag_entry) {
1564                         pr_err("qtaguid: ctrl_tag(%s): "
1565                                "socket tag alloc failed\n",
1566                                input);
1567                         spin_unlock_bh(&sock_tag_list_lock);
1568                         res = -ENOMEM;
1569                         goto err_put;
1570                 }
1571                 sock_tag_entry->sk = el_socket->sk;
1572                 sock_tag_entry->socket = el_socket;
1573                 sock_tag_entry->tag = combine_atag_with_uid(acct_tag,
1574                                                             uid);
1575                 sock_tag_tree_insert(sock_tag_entry, &sock_tag_tree);
1576         }
1577         spin_unlock_bh(&sock_tag_list_lock);
1578         /* We keep the ref to the socket (file) until it is untagged */
1579         CT_DEBUG("qtaguid: ctrl_tag(%s): done. socket->...->f_count=%d\n",
1580                  input,
1581                  el_socket ? atomic_read(&el_socket->file->f_count) : -1);
1582         return 0;
1583
1584 err_put:
1585         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
1586         sockfd_put(el_socket);
1587         refcnt--;
1588 err:
1589         CT_DEBUG("qtaguid: ctrl_tag(%s): done. socket->...->f_count=%d\n",
1590                  input, refcnt);
1591         return res;
1592 }
1593
1594 static int ctrl_cmd_untag(const char *input)
1595 {
1596         char cmd;
1597         int sock_fd = 0;
1598         struct socket *el_socket;
1599         int refcnt = -1;
1600         int res, argc;
1601         struct sock_tag *sock_tag_entry;
1602
1603         argc = sscanf(input, "%c %d", &cmd, &sock_fd);
1604         CT_DEBUG("qtaguid: ctrl_untag(%s): argc=%d cmd=%c sock_fd=%d\n",
1605                  input, argc, cmd, sock_fd);
1606         if (argc < 2) {
1607                 res = -EINVAL;
1608                 goto err;
1609         }
1610         el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
1611         if (!el_socket) {
1612                 pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
1613                         " sock_fd=%d err=%d\n", input, sock_fd, res);
1614                 goto err;
1615         }
1616         refcnt = atomic_read(&el_socket->file->f_count);
1617         CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%d\n",
1618                  input, refcnt);
1619         spin_lock_bh(&sock_tag_list_lock);
1620         sock_tag_entry = get_sock_stat_nl(el_socket->sk);
1621         if (!sock_tag_entry) {
1622                 spin_unlock_bh(&sock_tag_list_lock);
1623                 res = -EINVAL;
1624                 goto err_put;
1625         }
1626         /*
1627          * The socket already belongs to the current process
1628          * so it can do whatever it wants to it.
1629          */
1630         rb_erase(&sock_tag_entry->sock_node, &sock_tag_tree);
1631
1632         /*
1633          * Release the sock_fd that was grabbed at tag time,
1634          * and once more for the sockfd_lookup() here.
1635          */
1636         sockfd_put(sock_tag_entry->socket);
1637         spin_unlock_bh(&sock_tag_list_lock);
1638         sockfd_put(el_socket);
1639         refcnt -= 2;
1640         kfree(sock_tag_entry);
1641         CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%d\n",
1642                  input, refcnt);
1643
1644         return 0;
1645
1646 err_put:
1647         /* Release the sock_fd that was grabbed by sockfd_lookup(). */
1648         sockfd_put(el_socket);
1649         refcnt--;
1650 err:
1651         CT_DEBUG("qtaguid: ctrl_untag(%s): done. socket->...->f_count=%d\n",
1652                  input, refcnt);
1653         return res;
1654 }
1655
1656 static int qtaguid_ctrl_parse(const char *input, int count)
1657 {
1658         char cmd;
1659         int res;
1660
1661         cmd = input[0];
1662         /* Collect params for commands */
1663         switch (cmd) {
1664         case 'd':
1665                 res = ctrl_cmd_delete(input);
1666                 break;
1667
1668         case 's':
1669                 res = ctrl_cmd_counter_set(input);
1670                 break;
1671
1672         case 't':
1673                 res = ctrl_cmd_tag(input);
1674                 break;
1675
1676         case 'u':
1677                 res = ctrl_cmd_untag(input);
1678                 break;
1679
1680         default:
1681                 res = -EINVAL;
1682                 goto err;
1683         }
1684         if (!res)
1685                 res = count;
1686 err:
1687         CT_DEBUG("qtaguid: ctrl(%s): res=%d\n", input, res);
1688         return res;
1689 }
1690
1691 #define MAX_QTAGUID_CTRL_INPUT_LEN 255
1692 static int qtaguid_ctrl_proc_write(struct file *file, const char __user *buffer,
1693                         unsigned long count, void *data)
1694 {
1695         char input_buf[MAX_QTAGUID_CTRL_INPUT_LEN];
1696
1697         if (unlikely(module_passive))
1698                 return count;
1699
1700         if (count >= MAX_QTAGUID_CTRL_INPUT_LEN)
1701                 return -EINVAL;
1702
1703         if (copy_from_user(input_buf, buffer, count))
1704                 return -EFAULT;
1705
1706         input_buf[count] = '\0';
1707         return qtaguid_ctrl_parse(input_buf, count);
1708 }
1709
1710 struct proc_print_info {
1711         char *outp;
1712         char **num_items_returned;
1713         struct iface_stat *iface_entry;
1714         struct tag_stat *ts_entry;
1715         int item_index;
1716         int char_count;
1717 };
1718
1719 static int pp_stats_line(struct proc_print_info *ppi, int cnt_set)
1720 {
1721         int len;
1722         struct data_counters *cnts;
1723         if (!ppi->item_index) {
1724                 len = snprintf(ppi->outp, ppi->char_count,
1725                                "idx iface acct_tag_hex uid_tag_int cnt_set "
1726                                "rx_bytes rx_packets "
1727                                "tx_bytes tx_packets "
1728                                "rx_tcp_packets rx_tcp_bytes "
1729                                "rx_udp_packets rx_udp_bytes "
1730                                "rx_other_packets rx_other_bytes "
1731                                "tx_tcp_packets tx_tcp_bytes "
1732                                "tx_udp_packets tx_udp_bytes "
1733                                "tx_other_packets tx_other_bytes\n");
1734         } else {
1735                 tag_t tag = ppi->ts_entry->tn.tag;
1736                 uid_t stat_uid = get_uid_from_tag(tag);
1737                 if (!can_read_other_uid_stats(stat_uid)) {
1738                         CT_DEBUG("qtaguid: stats line: "
1739                                  "%s 0x%llx %u: "
1740                                  "insufficient priv from pid=%u uid=%u\n",
1741                                  ppi->iface_entry->ifname,
1742                                  get_atag_from_tag(tag), stat_uid,
1743                                  current->pid, current_fsuid());
1744                         return 0;
1745                 }
1746                 cnts = &ppi->ts_entry->counters;
1747                 len = snprintf(
1748                         ppi->outp, ppi->char_count,
1749                         "%d %s 0x%llx %u %u "
1750                         "%llu %llu "
1751                         "%llu %llu "
1752                         "%llu %llu "
1753                         "%llu %llu "
1754                         "%llu %llu "
1755                         "%llu %llu "
1756                         "%llu %llu "
1757                         "%llu %llu\n",
1758                         ppi->item_index,
1759                         ppi->iface_entry->ifname,
1760                         get_atag_from_tag(tag),
1761                         stat_uid,
1762                         cnt_set,
1763                         dc_sum_bytes(cnts, cnt_set, IFS_RX),
1764                         dc_sum_packets(cnts, cnt_set, IFS_RX),
1765                         dc_sum_bytes(cnts, cnt_set, IFS_TX),
1766                         dc_sum_packets(cnts, cnt_set, IFS_TX),
1767                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].bytes,
1768                         cnts->bpc[cnt_set][IFS_RX][IFS_TCP].packets,
1769                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].bytes,
1770                         cnts->bpc[cnt_set][IFS_RX][IFS_UDP].packets,
1771                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].bytes,
1772                         cnts->bpc[cnt_set][IFS_RX][IFS_PROTO_OTHER].packets,
1773                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].bytes,
1774                         cnts->bpc[cnt_set][IFS_TX][IFS_TCP].packets,
1775                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].bytes,
1776                         cnts->bpc[cnt_set][IFS_TX][IFS_UDP].packets,
1777                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].bytes,
1778                         cnts->bpc[cnt_set][IFS_TX][IFS_PROTO_OTHER].packets);
1779         }
1780         return len;
1781 }
1782
1783 bool pp_sets(struct proc_print_info *ppi)
1784 {
1785         int len;
1786         int counter_set;
1787         for (counter_set = 0; counter_set < IFS_MAX_COUNTER_SETS;
1788              counter_set++) {
1789                 len = pp_stats_line(ppi, counter_set);
1790                 if (len >= ppi->char_count) {
1791                         *ppi->outp = '\0';
1792                         return false;
1793                 }
1794                 if (len) {
1795                         ppi->outp += len;
1796                         ppi->char_count -= len;
1797                         (*ppi->num_items_returned)++;
1798                 }
1799         }
1800         return true;
1801 }
1802
1803 /*
1804  * Procfs reader to get all tag stats using style "1)" as described in
1805  * fs/proc/generic.c
1806  * Groups all protocols tx/rx bytes.
1807  */
1808 static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
1809                                 off_t items_to_skip, int char_count, int *eof,
1810                                 void *data)
1811 {
1812         struct proc_print_info ppi;
1813         int len;
1814
1815         ppi.outp = page;
1816         ppi.item_index = 0;
1817         ppi.char_count = char_count;
1818         ppi.num_items_returned = num_items_returned;
1819
1820         if (unlikely(module_passive)) {
1821                 len = pp_stats_line(&ppi, 0);
1822                 /* The header should always be shorter than the buffer. */
1823                 WARN_ON(len >= ppi.char_count);
1824                 *eof = 1;
1825                 return len;
1826         }
1827
1828         CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
1829                 "char_count=%d *eof=%d\n", page, *num_items_returned,
1830                 items_to_skip, char_count, *eof);
1831
1832         if (*eof)
1833                 return 0;
1834
1835         if (!items_to_skip) {
1836                 /* The idx is there to help debug when things go belly up. */
1837                 len = pp_stats_line(&ppi, 0);
1838                 /* Don't advance the outp unless the whole line was printed */
1839                 if (len >= ppi.char_count) {
1840                         *ppi.outp = '\0';
1841                         return ppi.outp - page;
1842                 }
1843                 ppi.outp += len;
1844                 ppi.char_count -= len;
1845         }
1846
1847         spin_lock_bh(&iface_stat_list_lock);
1848         list_for_each_entry(ppi.iface_entry, &iface_stat_list, list) {
1849                 struct rb_node *node;
1850                 spin_lock_bh(&ppi.iface_entry->tag_stat_list_lock);
1851                 for (node = rb_first(&ppi.iface_entry->tag_stat_tree);
1852                      node;
1853                      node = rb_next(node)) {
1854                         ppi.ts_entry = rb_entry(node, struct tag_stat, tn.node);
1855                         if (ppi.item_index++ < items_to_skip)
1856                                 continue;
1857                         if (!pp_sets(&ppi)) {
1858                                 spin_unlock_bh(
1859                                         &ppi.iface_entry->tag_stat_list_lock);
1860                                 spin_unlock_bh(&iface_stat_list_lock);
1861                                 return ppi.outp - page;
1862                         }
1863                 }
1864                 spin_unlock_bh(&ppi.iface_entry->tag_stat_list_lock);
1865         }
1866         spin_unlock_bh(&iface_stat_list_lock);
1867
1868         *eof = 1;
1869         return ppi.outp - page;
1870 }
1871
1872 /*------------------------------------------*/
1873 static int __init qtaguid_proc_register(struct proc_dir_entry **res_procdir)
1874 {
1875         int ret;
1876         *res_procdir = proc_mkdir(module_procdirname, init_net.proc_net);
1877         if (!*res_procdir) {
1878                 pr_err("qtaguid: failed to create proc/.../xt_qtaguid\n");
1879                 ret = -ENOMEM;
1880                 goto no_dir;
1881         }
1882
1883         xt_qtaguid_ctrl_file = create_proc_entry("ctrl", proc_ctrl_perms,
1884                                                 *res_procdir);
1885         if (!xt_qtaguid_ctrl_file) {
1886                 pr_err("qtaguid: failed to create xt_qtaguid/ctrl "
1887                         " file\n");
1888                 ret = -ENOMEM;
1889                 goto no_ctrl_entry;
1890         }
1891         xt_qtaguid_ctrl_file->read_proc = qtaguid_ctrl_proc_read;
1892         xt_qtaguid_ctrl_file->write_proc = qtaguid_ctrl_proc_write;
1893
1894         xt_qtaguid_stats_file = create_proc_entry("stats", proc_stats_perms,
1895                                                 *res_procdir);
1896         if (!xt_qtaguid_stats_file) {
1897                 pr_err("qtaguid: failed to create xt_qtaguid/stats "
1898                         "file\n");
1899                 ret = -ENOMEM;
1900                 goto no_stats_entry;
1901         }
1902         xt_qtaguid_stats_file->read_proc = qtaguid_stats_proc_read;
1903         /*
1904          * TODO: add support counter hacking
1905          * xt_qtaguid_stats_file->write_proc = qtaguid_stats_proc_write;
1906          */
1907         return 0;
1908
1909 no_stats_entry:
1910         remove_proc_entry("ctrl", *res_procdir);
1911 no_ctrl_entry:
1912         remove_proc_entry("xt_qtaguid", NULL);
1913 no_dir:
1914         return ret;
1915 }
1916
1917 static struct xt_match qtaguid_mt_reg __read_mostly = {
1918         /*
1919          * This module masquerades as the "owner" module so that iptables
1920          * tools can deal with it.
1921          */
1922         .name       = "owner",
1923         .revision   = 1,
1924         .family     = NFPROTO_UNSPEC,
1925         .match      = qtaguid_mt,
1926         .matchsize  = sizeof(struct xt_qtaguid_match_info),
1927         .me         = THIS_MODULE,
1928 };
1929
1930 static int __init qtaguid_mt_init(void)
1931 {
1932         if (qtaguid_proc_register(&xt_qtaguid_procdir)
1933             || iface_stat_init(xt_qtaguid_procdir)
1934             || xt_register_match(&qtaguid_mt_reg))
1935                 return -1;
1936         return 0;
1937 }
1938
1939 /*
1940  * TODO: allow unloading of the module.
1941  * For now stats are permanent.
1942  * Kconfig forces'y/n' and never an 'm'.
1943  */
1944
1945 module_init(qtaguid_mt_init);
1946 MODULE_AUTHOR("jpa <jpa@google.com>");
1947 MODULE_DESCRIPTION("Xtables: socket owner+tag matching and associated stats");
1948 MODULE_LICENSE("GPL");
1949 MODULE_ALIAS("ipt_owner");
1950 MODULE_ALIAS("ip6t_owner");
1951 MODULE_ALIAS("ipt_qtaguid");
1952 MODULE_ALIAS("ip6t_qtaguid");