netfilter: xt_qtaguid: start tracking iface rx/tx at low level
JP Abgrall [Fri, 27 Apr 2012 19:57:39 +0000 (12:57 -0700)]
qtaguid tracks the device stats by monitoring when it goes up and down,
then it gets the dev_stats().
But devs don't correctly report stats (either they don't count headers
symmetrically between rx/tx, or they count internal control messages).

Now qtaguid counts the rx/tx bytes/packets during raw:prerouting and
mangle:postrouting (nat is not available in ipv6).

The results are in
  /proc/net/xt_qtaguid/iface_stat_fmt
which outputs a format line (bash expansion):
  ifname  total_skb_{rx,tx}_{bytes,packets}

Added event counters for pre/post handling.
Added extra ctrl_*() pid/uid debugging.

Change-Id: Id84345d544ad1dd5f63e3842cab229e71d339297
Signed-off-by: JP Abgrall <jpa@google.com>

net/netfilter/xt_qtaguid.c
net/netfilter/xt_qtaguid_internal.h
net/netfilter/xt_qtaguid_print.c

index 062f582..f6d4cfc 100644 (file)
@@ -114,8 +114,15 @@ module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
 /*---------------------------------------------------------------------------*/
 static const char *iface_stat_procdirname = "iface_stat";
 static struct proc_dir_entry *iface_stat_procdir;
+/*
+ * The iface_stat_all* will go away once userspace gets use to the new fields
+ * that have a format line.
+ */
 static const char *iface_stat_all_procfilename = "iface_stat_all";
 static struct proc_dir_entry *iface_stat_all_procfile;
+static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
+static struct proc_dir_entry *iface_stat_fmt_procfile;
+
 
 /*
  * Ordering of locks:
@@ -128,9 +135,9 @@ static struct proc_dir_entry *iface_stat_all_procfile;
  * Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
  * is acquired.
  *
- * Call tree with all lock holders as of 2011-09-25:
+ * Call tree with all lock holders as of 2012-04-27:
  *
- * iface_stat_all_proc_read()
+ * iface_stat_fmt_proc_read()
  *   iface_stat_list_lock
  *     (struct iface_stat)
  *
@@ -781,13 +788,14 @@ done:
        return iface_entry;
 }
 
-static int iface_stat_all_proc_read(char *page, char **num_items_returned,
+static int iface_stat_fmt_proc_read(char *page, char **num_items_returned,
                                    off_t items_to_skip, int char_count,
                                    int *eof, void *data)
 {
        char *outp = page;
        int item_index = 0;
        int len;
+       int fmt = (int)data; /* The data is just 1 (old) or 2 (uses fmt) */
        struct iface_stat *iface_entry;
        struct rtnl_link_stats64 dev_stats, *stats;
        struct rtnl_link_stats64 no_dev_stats = {0};
@@ -797,14 +805,32 @@ static int iface_stat_all_proc_read(char *page, char **num_items_returned,
                return 0;
        }
 
-       CT_DEBUG("qtaguid:proc iface_stat_all "
+       CT_DEBUG("qtaguid:proc iface_stat_fmt "
+                "pid=%u tgid=%u uid=%u "
                 "page=%p *num_items_returned=%p off=%ld "
-                "char_count=%d *eof=%d\n", page, *num_items_returned,
+                "char_count=%d *eof=%d\n",
+                current->pid, current->tgid, current_fsuid(),
+                page, *num_items_returned,
                 items_to_skip, char_count, *eof);
 
        if (*eof)
                return 0;
 
+       if (fmt == 2 && item_index++ >= items_to_skip) {
+               len = snprintf(outp, char_count,
+                              "ifname "
+                              "total_skb_rx_bytes total_skb_rx_packets "
+                              "total_skb_tx_bytes total_skb_tx_packets\n"
+                       );
+               if (len >= char_count) {
+                       *outp = '\0';
+                       return outp - page;
+               }
+               outp += len;
+               char_count -= len;
+               (*num_items_returned)++;
+       }
+
        /*
         * This lock will prevent iface_stat_update() from changing active,
         * and in turn prevent an interface from unregistering itself.
@@ -820,18 +846,37 @@ static int iface_stat_all_proc_read(char *page, char **num_items_returned,
                } else {
                        stats = &no_dev_stats;
                }
-               len = snprintf(outp, char_count,
-                              "%s %d "
-                              "%llu %llu %llu %llu "
-                              "%llu %llu %llu %llu\n",
-                              iface_entry->ifname,
-                              iface_entry->active,
-                              iface_entry->totals[IFS_RX].bytes,
-                              iface_entry->totals[IFS_RX].packets,
-                              iface_entry->totals[IFS_TX].bytes,
-                              iface_entry->totals[IFS_TX].packets,
-                              stats->rx_bytes, stats->rx_packets,
-                              stats->tx_bytes, stats->tx_packets);
+               /*
+                * If the meaning of the data changes, then update the fmtX
+                * string.
+                */
+               if (fmt == 1) {
+                       len = snprintf(
+                               outp, char_count,
+                               "%s %d "
+                               "%llu %llu %llu %llu "
+                               "%llu %llu %llu %llu\n",
+                               iface_entry->ifname,
+                               iface_entry->active,
+                               iface_entry->totals_via_dev[IFS_RX].bytes,
+                               iface_entry->totals_via_dev[IFS_RX].packets,
+                               iface_entry->totals_via_dev[IFS_TX].bytes,
+                               iface_entry->totals_via_dev[IFS_TX].packets,
+                               stats->rx_bytes, stats->rx_packets,
+                               stats->tx_bytes, stats->tx_packets
+                               );
+               } else {
+                       len = snprintf(
+                               outp, char_count,
+                               "%s "
+                               "%llu %llu %llu %llu\n",
+                               iface_entry->ifname,
+                               iface_entry->totals_via_skb[IFS_RX].bytes,
+                               iface_entry->totals_via_skb[IFS_RX].packets,
+                               iface_entry->totals_via_skb[IFS_TX].bytes,
+                               iface_entry->totals_via_skb[IFS_TX].packets
+                               );
+               }
                if (len >= char_count) {
                        spin_unlock_bh(&iface_stat_list_lock);
                        *outp = '\0';
@@ -865,13 +910,17 @@ static void iface_create_proc_worker(struct work_struct *work)
        new_iface->proc_ptr = proc_entry;
 
        create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
-                       read_proc_u64, &new_iface->totals[IFS_TX].bytes);
+                              read_proc_u64,
+                              &new_iface->totals_via_dev[IFS_TX].bytes);
        create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
-                       read_proc_u64, &new_iface->totals[IFS_RX].bytes);
+                              read_proc_u64,
+                              &new_iface->totals_via_dev[IFS_RX].bytes);
        create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
-                       read_proc_u64, &new_iface->totals[IFS_TX].packets);
+                              read_proc_u64,
+                              &new_iface->totals_via_dev[IFS_TX].packets);
        create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
-                       read_proc_u64, &new_iface->totals[IFS_RX].packets);
+                              read_proc_u64,
+                              &new_iface->totals_via_dev[IFS_RX].packets);
        create_proc_read_entry("active", proc_iface_perms, proc_entry,
                        read_proc_bool, &new_iface->active);
 
@@ -975,11 +1024,13 @@ static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
                             "iface reset its stats unexpectedly\n", __func__,
                             net_dev->name);
 
-               iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes;
-               iface->totals[IFS_TX].packets +=
+               iface->totals_via_dev[IFS_TX].bytes +=
+                       iface->last_known[IFS_TX].bytes;
+               iface->totals_via_dev[IFS_TX].packets +=
                        iface->last_known[IFS_TX].packets;
-               iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes;
-               iface->totals[IFS_RX].packets +=
+               iface->totals_via_dev[IFS_RX].bytes +=
+                       iface->last_known[IFS_RX].bytes;
+               iface->totals_via_dev[IFS_RX].packets +=
                        iface->last_known[IFS_RX].packets;
                iface->last_known_valid = false;
                IF_DEBUG("qtaguid: %s(%s): iface=%p "
@@ -1147,6 +1198,27 @@ static struct sock_tag *get_sock_stat(const struct sock *sk)
        return sock_tag_entry;
 }
 
+static int ipx_proto(const struct sk_buff *skb,
+                    struct xt_action_param *par)
+{
+       int thoff, tproto;
+
+       switch (par->family) {
+       case NFPROTO_IPV6:
+               tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);
+               if (tproto < 0)
+                       MT_DEBUG("%s(): transport header not found in ipv6"
+                                " skb=%p\n", __func__, skb);
+               break;
+       case NFPROTO_IPV4:
+               tproto = ip_hdr(skb)->protocol;
+               break;
+       default:
+               tproto = IPPROTO_RAW;
+       }
+       return tproto;
+}
+
 static void
 data_counters_update(struct data_counters *dc, int set,
                     enum ifs_tx_rx direction, int proto, int bytes)
@@ -1207,10 +1279,10 @@ static void iface_stat_update(struct net_device *net_dev, bool stash_only)
                spin_unlock_bh(&iface_stat_list_lock);
                return;
        }
-       entry->totals[IFS_TX].bytes += stats->tx_bytes;
-       entry->totals[IFS_TX].packets += stats->tx_packets;
-       entry->totals[IFS_RX].bytes += stats->rx_bytes;
-       entry->totals[IFS_RX].packets += stats->rx_packets;
+       entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
+       entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
+       entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
+       entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
        /* We don't need the last_known[] anymore */
        entry->last_known_valid = false;
        _iface_stat_set_active(entry, net_dev, false);
@@ -1220,6 +1292,67 @@ static void iface_stat_update(struct net_device *net_dev, bool stash_only)
        spin_unlock_bh(&iface_stat_list_lock);
 }
 
+/*
+ * Update stats for the specified interface from the skb.
+ * Do nothing if the entry
+ * does not exist (when a device was never configured with an IP address).
+ * Called on each sk.
+ */
+static void iface_stat_update_from_skb(const struct sk_buff *skb,
+                                      struct xt_action_param *par)
+{
+       struct iface_stat *entry;
+       const struct net_device *el_dev;
+       enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
+       int bytes = skb->len;
+
+       if (!skb->dev) {
+               MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
+               el_dev = par->in ? : par->out;
+       } else {
+               const struct net_device *other_dev;
+               el_dev = skb->dev;
+               other_dev = par->in ? : par->out;
+               if (el_dev != other_dev) {
+                       MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
+                                "par->(in/out)=%p %s\n",
+                                par->hooknum, el_dev, el_dev->name, other_dev,
+                                other_dev->name);
+               }
+       }
+
+       if (unlikely(!el_dev)) {
+               pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n",
+                      par->hooknum, __func__);
+               BUG();
+       } else if (unlikely(!el_dev->name)) {
+               pr_err("qtaguid[%d]: %s(): no dev->name?!!\n",
+                      par->hooknum, __func__);
+               BUG();
+       } else {
+               int proto = ipx_proto(skb, par);
+               MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
+                        par->hooknum, el_dev->name, el_dev->type,
+                        par->family, proto);
+       }
+
+       spin_lock_bh(&iface_stat_list_lock);
+       entry = get_iface_entry(el_dev->name);
+       if (entry == NULL) {
+               IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
+                        __func__, el_dev->name);
+               spin_unlock_bh(&iface_stat_list_lock);
+               return;
+       }
+
+       IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
+                el_dev->name, entry);
+
+       entry->totals_via_skb[direction].bytes += bytes;
+       entry->totals_via_skb[direction].packets++;
+       spin_unlock_bh(&iface_stat_list_lock);
+}
+
 static void tag_stat_update(struct tag_stat *tag_entry,
                        enum ifs_tx_rx direction, int proto, int bytes)
 {
@@ -1467,18 +1600,31 @@ static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
                                                    parent_procdir);
        if (!iface_stat_all_procfile) {
                pr_err("qtaguid: iface_stat: init "
-                      " failed to create stat_all proc entry\n");
+                      " failed to create stat_old proc entry\n");
                err = -1;
                goto err_zap_entry;
        }
-       iface_stat_all_procfile->read_proc = iface_stat_all_proc_read;
+       iface_stat_all_procfile->read_proc = iface_stat_fmt_proc_read;
+       iface_stat_all_procfile->data = (void *)1; /* fmt1 */
+
+       iface_stat_fmt_procfile = create_proc_entry(iface_stat_fmt_procfilename,
+                                                   proc_iface_perms,
+                                                   parent_procdir);
+       if (!iface_stat_fmt_procfile) {
+               pr_err("qtaguid: iface_stat: init "
+                      " failed to create stat_all proc entry\n");
+               err = -1;
+               goto err_zap_all_stats_entry;
+       }
+       iface_stat_fmt_procfile->read_proc = iface_stat_fmt_proc_read;
+       iface_stat_fmt_procfile->data = (void *)2; /* fmt2 */
 
 
        err = register_netdevice_notifier(&iface_netdev_notifier_blk);
        if (err) {
                pr_err("qtaguid: iface_stat: init "
                       "failed to register dev event handler\n");
-               goto err_zap_all_stats_entry;
+               goto err_zap_all_stats_entries;
        }
        err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
        if (err) {
@@ -1499,6 +1645,8 @@ err_unreg_ip4_addr:
        unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
 err_unreg_nd:
        unregister_netdevice_notifier(&iface_netdev_notifier_blk);
+err_zap_all_stats_entries:
+       remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
 err_zap_all_stats_entry:
        remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
 err_zap_entry:
@@ -1550,27 +1698,6 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
        return sk;
 }
 
-static int ipx_proto(const struct sk_buff *skb,
-                    struct xt_action_param *par)
-{
-       int thoff, tproto;
-
-       switch (par->family) {
-       case NFPROTO_IPV6:
-               tproto = ipv6_find_hdr(skb, &thoff, -1, NULL);
-               if (tproto < 0)
-                       MT_DEBUG("%s(): transport header not found in ipv6"
-                                " skb=%p\n", __func__, skb);
-               break;
-       case NFPROTO_IPV4:
-               tproto = ip_hdr(skb)->protocol;
-               break;
-       default:
-               tproto = IPPROTO_RAW;
-       }
-       return tproto;
-}
-
 static void account_for_uid(const struct sk_buff *skb,
                            const struct sock *alternate_sk, uid_t uid,
                            struct xt_action_param *par)
@@ -1630,8 +1757,22 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
                goto ret_res;
        }
 
-       sk = skb->sk;
+       switch (par->hooknum) {
+       case NF_INET_PRE_ROUTING:
+       case NF_INET_POST_ROUTING:
+               atomic64_inc(&qtu_events.match_calls_prepost);
+               iface_stat_update_from_skb(skb, par);
+               /*
+                * We are done in pre/post. The skb will get processed
+                * further alter.
+                */
+               res = (info->match ^ info->invert);
+               goto ret_res;
+               break;
+       /* default: Fall through and do UID releated work */
+       }
 
+       sk = skb->sk;
        if (sk == NULL) {
                /*
                 * A missing sk->sk_socket happens when packets are in-flight
@@ -1806,8 +1947,10 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
        if (*eof)
                return 0;
 
-       CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
-               page, items_to_skip, char_count, *eof);
+       CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u "
+                "page=%p off=%ld char_count=%d *eof=%d\n",
+                current->pid, current->tgid, current_fsuid(),
+                page, items_to_skip, char_count, *eof);
 
        spin_lock_bh(&sock_tag_list_lock);
        for (node = rb_first(&sock_tag_tree);
@@ -1851,6 +1994,7 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
                               "delete_cmds=%llu "
                               "iface_events=%llu "
                               "match_calls=%llu "
+                              "match_calls_prepost=%llu "
                               "match_found_sk=%llu "
                               "match_found_sk_in_ct=%llu "
                               "match_found_no_sk_in_ct=%llu "
@@ -1862,6 +2006,7 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
                               atomic64_read(&qtu_events.delete_cmds),
                               atomic64_read(&qtu_events.iface_events),
                               atomic64_read(&qtu_events.match_calls),
+                              atomic64_read(&qtu_events.match_calls_prepost),
                               atomic64_read(&qtu_events.match_found_sk),
                               atomic64_read(&qtu_events.match_found_sk_in_ct),
                               atomic64_read(
@@ -2135,7 +2280,9 @@ static int ctrl_cmd_tag(const char *input)
        el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
        if (!el_socket) {
                pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
-                       " sock_fd=%d err=%d\n", input, sock_fd, res);
+                       " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
+                       input, sock_fd, res, current->pid, current->tgid,
+                       current_fsuid());
                goto err;
        }
        CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
@@ -2280,7 +2427,9 @@ static int ctrl_cmd_untag(const char *input)
        el_socket = sockfd_lookup(sock_fd, &res);  /* This locks the file */
        if (!el_socket) {
                pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
-                       " sock_fd=%d err=%d\n", input, sock_fd, res);
+                       " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
+                       input, sock_fd, res, current->pid, current->tgid,
+                       current_fsuid());
                goto err;
        }
        CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
@@ -2356,6 +2505,9 @@ static int qtaguid_ctrl_parse(const char *input, int count)
        char cmd;
        int res;
 
+       CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
+                input, current->pid, current->tgid, current_fsuid());
+
        cmd = input[0];
        /* Collect params for commands */
        switch (cmd) {
@@ -2532,9 +2684,12 @@ static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
                return len;
        }
 
-       CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
-               "char_count=%d *eof=%d\n", page, *num_items_returned,
-               items_to_skip, char_count, *eof);
+       CT_DEBUG("qtaguid:proc stats pid=%u tgid=%u uid=%u "
+                "page=%p *num_items_returned=%p off=%ld "
+                "char_count=%d *eof=%d\n",
+                current->pid, current->tgid, current_fsuid(),
+                page, *num_items_returned,
+                items_to_skip, char_count, *eof);
 
        if (*eof)
                return 0;
index 02479d6..d79f838 100644 (file)
@@ -202,7 +202,8 @@ struct iface_stat {
        /* net_dev is only valid for active iface_stat */
        struct net_device *net_dev;
 
-       struct byte_packet_counters totals[IFS_MAX_DIRECTIONS];
+       struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS];
+       struct byte_packet_counters totals_via_skb[IFS_MAX_DIRECTIONS];
        /*
         * We keep the last_known, because some devices reset their counters
         * just before NETDEV_UP, while some will reset just before
@@ -254,6 +255,8 @@ struct qtaguid_event_counts {
        atomic64_t iface_events;  /* Number of NETDEV_* events handled */
 
        atomic64_t match_calls;   /* Number of times iptables called mt */
+       /* Number of times iptables called mt from pre or post routing hooks */
+       atomic64_t match_calls_prepost;
        /*
         * match_found_sk_*: numbers related to the netfilter matching
         * function finding a sock for the sk_buff.
index 3917678..8cbd8e4 100644 (file)
@@ -183,7 +183,11 @@ char *pp_iface_stat(struct iface_stat *is)
                res = kasprintf(GFP_ATOMIC, "iface_stat@%p{"
                                "list=list_head{...}, "
                                "ifname=%s, "
-                               "total={rx={bytes=%llu, "
+                               "total_dev={rx={bytes=%llu, "
+                               "packets=%llu}, "
+                               "tx={bytes=%llu, "
+                               "packets=%llu}}, "
+                               "total_skb={rx={bytes=%llu, "
                                "packets=%llu}, "
                                "tx={bytes=%llu, "
                                "packets=%llu}}, "
@@ -198,10 +202,14 @@ char *pp_iface_stat(struct iface_stat *is)
                                "tag_stat_tree=rb_root{...}}",
                                is,
                                is->ifname,
-                               is->totals[IFS_RX].bytes,
-                               is->totals[IFS_RX].packets,
-                               is->totals[IFS_TX].bytes,
-                               is->totals[IFS_TX].packets,
+                               is->totals_via_dev[IFS_RX].bytes,
+                               is->totals_via_dev[IFS_RX].packets,
+                               is->totals_via_dev[IFS_TX].bytes,
+                               is->totals_via_dev[IFS_TX].packets,
+                               is->totals_via_skb[IFS_RX].bytes,
+                               is->totals_via_skb[IFS_RX].packets,
+                               is->totals_via_skb[IFS_TX].bytes,
+                               is->totals_via_skb[IFS_TX].packets,
                                is->last_known_valid,
                                is->last_known[IFS_RX].bytes,
                                is->last_known[IFS_RX].packets,