[PATCH 5/7][RFC] netfilter: xt_qtaguid: start tracking iface rx/tx at low level
From: John Stultz
Date: Fri Sep 21 2012 - 22:11:56 EST
From: JP Abgrall <jpa@xxxxxxxxxx>
qtaguid tracks the device stats by monitoring when it goes up and down,
then it gets the dev_stats().
But devs don't correctly report stats (either they don't count headers
symmetrically between rx/tx, or they count internal control messages).
Now qtaguid counts the rx/tx bytes/packets during raw:prerouting and
mangle:postrouting (nat is not available in ipv6).
The results are in
/proc/net/xt_qtaguid/iface_stat_fmt
which outputs a format line (bash expansion):
ifname total_skb_{rx,tx}_{bytes,packets}
Added event counters for pre/post handling.
Added extra ctrl_*() pid/uid debugging.
Cc: netdev@xxxxxxxxxxxxxxx
Cc: JP Abgrall <jpa@xxxxxxxxxx>
Cc: Ashish Sharma <ashishsharma@xxxxxxxxxx>
Cc: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@xxxxxxxxx>
Signed-off-by: JP Abgrall <jpa@xxxxxxxxxx>
Signed-off-by: John Stultz <john.stultz@xxxxxxxxxx>
---
net/netfilter/xt_qtaguid.c | 277 +++++++++++++++++++++++++++--------
net/netfilter/xt_qtaguid_internal.h | 5 +-
net/netfilter/xt_qtaguid_print.c | 18 ++-
3 files changed, 233 insertions(+), 67 deletions(-)
diff --git a/net/netfilter/xt_qtaguid.c b/net/netfilter/xt_qtaguid.c
index 47dfb9e..f490ef5 100644
--- a/net/netfilter/xt_qtaguid.c
+++ b/net/netfilter/xt_qtaguid.c
@@ -104,8 +104,15 @@ module_param_named(debug_mask, qtaguid_debug_mask, uint, S_IRUGO | S_IWUSR);
/*---------------------------------------------------------------------------*/
static const char *iface_stat_procdirname = "iface_stat";
static struct proc_dir_entry *iface_stat_procdir;
+/*
+ * The iface_stat_all* will go away once userspace gets use to the new fields
+ * that have a format line.
+ */
static const char *iface_stat_all_procfilename = "iface_stat_all";
static struct proc_dir_entry *iface_stat_all_procfile;
+static const char *iface_stat_fmt_procfilename = "iface_stat_fmt";
+static struct proc_dir_entry *iface_stat_fmt_procfile;
+
/*
* Ordering of locks:
@@ -118,9 +125,9 @@ static struct proc_dir_entry *iface_stat_all_procfile;
* Notice how sock_tag_list_lock is held sometimes when uid_tag_data_tree_lock
* is acquired.
*
- * Call tree with all lock holders as of 2011-09-25:
+ * Call tree with all lock holders as of 2012-04-27:
*
- * iface_stat_all_proc_read()
+ * iface_stat_fmt_proc_read()
* iface_stat_list_lock
* (struct iface_stat)
*
@@ -771,13 +778,14 @@ done:
return iface_entry;
}
-static int iface_stat_all_proc_read(char *page, char **num_items_returned,
+static int iface_stat_fmt_proc_read(char *page, char **num_items_returned,
off_t items_to_skip, int char_count,
int *eof, void *data)
{
char *outp = page;
int item_index = 0;
int len;
+ int fmt = (int)data; /* The data is just 1 (old) or 2 (uses fmt) */
struct iface_stat *iface_entry;
struct rtnl_link_stats64 dev_stats, *stats;
struct rtnl_link_stats64 no_dev_stats = {0};
@@ -787,14 +795,32 @@ static int iface_stat_all_proc_read(char *page, char **num_items_returned,
return 0;
}
- CT_DEBUG("qtaguid:proc iface_stat_all "
+ CT_DEBUG("qtaguid:proc iface_stat_fmt "
+ "pid=%u tgid=%u uid=%u "
"page=%p *num_items_returned=%p off=%ld "
- "char_count=%d *eof=%d\n", page, *num_items_returned,
+ "char_count=%d *eof=%d\n",
+ current->pid, current->tgid, current_fsuid(),
+ page, *num_items_returned,
items_to_skip, char_count, *eof);
if (*eof)
return 0;
+ if (fmt == 2 && item_index++ >= items_to_skip) {
+ len = snprintf(outp, char_count,
+ "ifname "
+ "total_skb_rx_bytes total_skb_rx_packets "
+ "total_skb_tx_bytes total_skb_tx_packets\n"
+ );
+ if (len >= char_count) {
+ *outp = '\0';
+ return outp - page;
+ }
+ outp += len;
+ char_count -= len;
+ (*num_items_returned)++;
+ }
+
/*
* This lock will prevent iface_stat_update() from changing active,
* and in turn prevent an interface from unregistering itself.
@@ -810,18 +836,37 @@ static int iface_stat_all_proc_read(char *page, char **num_items_returned,
} else {
stats = &no_dev_stats;
}
- len = snprintf(outp, char_count,
- "%s %d "
- "%llu %llu %llu %llu "
- "%llu %llu %llu %llu\n",
- iface_entry->ifname,
- iface_entry->active,
- iface_entry->totals[IFS_RX].bytes,
- iface_entry->totals[IFS_RX].packets,
- iface_entry->totals[IFS_TX].bytes,
- iface_entry->totals[IFS_TX].packets,
- stats->rx_bytes, stats->rx_packets,
- stats->tx_bytes, stats->tx_packets);
+ /*
+ * If the meaning of the data changes, then update the fmtX
+ * string.
+ */
+ if (fmt == 1) {
+ len = snprintf(
+ outp, char_count,
+ "%s %d "
+ "%llu %llu %llu %llu "
+ "%llu %llu %llu %llu\n",
+ iface_entry->ifname,
+ iface_entry->active,
+ iface_entry->totals_via_dev[IFS_RX].bytes,
+ iface_entry->totals_via_dev[IFS_RX].packets,
+ iface_entry->totals_via_dev[IFS_TX].bytes,
+ iface_entry->totals_via_dev[IFS_TX].packets,
+ stats->rx_bytes, stats->rx_packets,
+ stats->tx_bytes, stats->tx_packets
+ );
+ } else {
+ len = snprintf(
+ outp, char_count,
+ "%s "
+ "%llu %llu %llu %llu\n",
+ iface_entry->ifname,
+ iface_entry->totals_via_skb[IFS_RX].bytes,
+ iface_entry->totals_via_skb[IFS_RX].packets,
+ iface_entry->totals_via_skb[IFS_TX].bytes,
+ iface_entry->totals_via_skb[IFS_TX].packets
+ );
+ }
if (len >= char_count) {
spin_unlock_bh(&iface_stat_list_lock);
*outp = '\0';
@@ -855,13 +900,17 @@ static void iface_create_proc_worker(struct work_struct *work)
new_iface->proc_ptr = proc_entry;
create_proc_read_entry("tx_bytes", proc_iface_perms, proc_entry,
- read_proc_u64, &new_iface->totals[IFS_TX].bytes);
+ read_proc_u64,
+ &new_iface->totals_via_dev[IFS_TX].bytes);
create_proc_read_entry("rx_bytes", proc_iface_perms, proc_entry,
- read_proc_u64, &new_iface->totals[IFS_RX].bytes);
+ read_proc_u64,
+ &new_iface->totals_via_dev[IFS_RX].bytes);
create_proc_read_entry("tx_packets", proc_iface_perms, proc_entry,
- read_proc_u64, &new_iface->totals[IFS_TX].packets);
+ read_proc_u64,
+ &new_iface->totals_via_dev[IFS_TX].packets);
create_proc_read_entry("rx_packets", proc_iface_perms, proc_entry,
- read_proc_u64, &new_iface->totals[IFS_RX].packets);
+ read_proc_u64,
+ &new_iface->totals_via_dev[IFS_RX].packets);
create_proc_read_entry("active", proc_iface_perms, proc_entry,
read_proc_bool, &new_iface->active);
@@ -965,11 +1014,13 @@ static void iface_check_stats_reset_and_adjust(struct net_device *net_dev,
"iface reset its stats unexpectedly\n", __func__,
net_dev->name);
- iface->totals[IFS_TX].bytes += iface->last_known[IFS_TX].bytes;
- iface->totals[IFS_TX].packets +=
+ iface->totals_via_dev[IFS_TX].bytes +=
+ iface->last_known[IFS_TX].bytes;
+ iface->totals_via_dev[IFS_TX].packets +=
iface->last_known[IFS_TX].packets;
- iface->totals[IFS_RX].bytes += iface->last_known[IFS_RX].bytes;
- iface->totals[IFS_RX].packets +=
+ iface->totals_via_dev[IFS_RX].bytes +=
+ iface->last_known[IFS_RX].bytes;
+ iface->totals_via_dev[IFS_RX].packets +=
iface->last_known[IFS_RX].packets;
iface->last_known_valid = false;
IF_DEBUG("qtaguid: %s(%s): iface=%p "
@@ -1137,6 +1188,27 @@ static struct sock_tag *get_sock_stat(const struct sock *sk)
return sock_tag_entry;
}
+static int ipx_proto(const struct sk_buff *skb,
+ struct xt_action_param *par)
+{
+ int thoff, tproto;
+
+ switch (par->family) {
+ case NFPROTO_IPV6:
+ tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
+ if (tproto < 0)
+ MT_DEBUG("%s(): transport header not found in ipv6"
+ " skb=%p\n", __func__, skb);
+ break;
+ case NFPROTO_IPV4:
+ tproto = ip_hdr(skb)->protocol;
+ break;
+ default:
+ tproto = IPPROTO_RAW;
+ }
+ return tproto;
+}
+
static void
data_counters_update(struct data_counters *dc, int set,
enum ifs_tx_rx direction, int proto, int bytes)
@@ -1197,10 +1269,10 @@ static void iface_stat_update(struct net_device *net_dev, bool stash_only)
spin_unlock_bh(&iface_stat_list_lock);
return;
}
- entry->totals[IFS_TX].bytes += stats->tx_bytes;
- entry->totals[IFS_TX].packets += stats->tx_packets;
- entry->totals[IFS_RX].bytes += stats->rx_bytes;
- entry->totals[IFS_RX].packets += stats->rx_packets;
+ entry->totals_via_dev[IFS_TX].bytes += stats->tx_bytes;
+ entry->totals_via_dev[IFS_TX].packets += stats->tx_packets;
+ entry->totals_via_dev[IFS_RX].bytes += stats->rx_bytes;
+ entry->totals_via_dev[IFS_RX].packets += stats->rx_packets;
/* We don't need the last_known[] anymore */
entry->last_known_valid = false;
_iface_stat_set_active(entry, net_dev, false);
@@ -1210,6 +1282,67 @@ static void iface_stat_update(struct net_device *net_dev, bool stash_only)
spin_unlock_bh(&iface_stat_list_lock);
}
+/*
+ * Update stats for the specified interface from the skb.
+ * Do nothing if the entry
+ * does not exist (when a device was never configured with an IP address).
+ * Called on each sk.
+ */
+static void iface_stat_update_from_skb(const struct sk_buff *skb,
+ struct xt_action_param *par)
+{
+ struct iface_stat *entry;
+ const struct net_device *el_dev;
+ enum ifs_tx_rx direction = par->in ? IFS_RX : IFS_TX;
+ int bytes = skb->len;
+
+ if (!skb->dev) {
+ MT_DEBUG("qtaguid[%d]: no skb->dev\n", par->hooknum);
+ el_dev = par->in ? : par->out;
+ } else {
+ const struct net_device *other_dev;
+ el_dev = skb->dev;
+ other_dev = par->in ? : par->out;
+ if (el_dev != other_dev) {
+ MT_DEBUG("qtaguid[%d]: skb->dev=%p %s vs "
+ "par->(in/out)=%p %s\n",
+ par->hooknum, el_dev, el_dev->name, other_dev,
+ other_dev->name);
+ }
+ }
+
+ if (unlikely(!el_dev)) {
+ pr_err("qtaguid[%d]: %s(): no par->in/out?!!\n",
+ par->hooknum, __func__);
+ BUG();
+ } else if (unlikely(!el_dev->name)) {
+ pr_err("qtaguid[%d]: %s(): no dev->name?!!\n",
+ par->hooknum, __func__);
+ BUG();
+ } else {
+ int proto = ipx_proto(skb, par);
+ MT_DEBUG("qtaguid[%d]: dev name=%s type=%d fam=%d proto=%d\n",
+ par->hooknum, el_dev->name, el_dev->type,
+ par->family, proto);
+ }
+
+ spin_lock_bh(&iface_stat_list_lock);
+ entry = get_iface_entry(el_dev->name);
+ if (entry == NULL) {
+ IF_DEBUG("qtaguid: iface_stat: %s(%s): not tracked\n",
+ __func__, el_dev->name);
+ spin_unlock_bh(&iface_stat_list_lock);
+ return;
+ }
+
+ IF_DEBUG("qtaguid: %s(%s): entry=%p\n", __func__,
+ el_dev->name, entry);
+
+ entry->totals_via_skb[direction].bytes += bytes;
+ entry->totals_via_skb[direction].packets++;
+ spin_unlock_bh(&iface_stat_list_lock);
+}
+
static void tag_stat_update(struct tag_stat *tag_entry,
enum ifs_tx_rx direction, int proto, int bytes)
{
@@ -1457,18 +1590,31 @@ static int __init iface_stat_init(struct proc_dir_entry *parent_procdir)
parent_procdir);
if (!iface_stat_all_procfile) {
pr_err("qtaguid: iface_stat: init "
- " failed to create stat_all proc entry\n");
+ " failed to create stat_old proc entry\n");
err = -1;
goto err_zap_entry;
}
- iface_stat_all_procfile->read_proc = iface_stat_all_proc_read;
+ iface_stat_all_procfile->read_proc = iface_stat_fmt_proc_read;
+ iface_stat_all_procfile->data = (void *)1; /* fmt1 */
+
+ iface_stat_fmt_procfile = create_proc_entry(iface_stat_fmt_procfilename,
+ proc_iface_perms,
+ parent_procdir);
+ if (!iface_stat_fmt_procfile) {
+ pr_err("qtaguid: iface_stat: init "
+ " failed to create stat_all proc entry\n");
+ err = -1;
+ goto err_zap_all_stats_entry;
+ }
+ iface_stat_fmt_procfile->read_proc = iface_stat_fmt_proc_read;
+ iface_stat_fmt_procfile->data = (void *)2; /* fmt2 */
err = register_netdevice_notifier(&iface_netdev_notifier_blk);
if (err) {
pr_err("qtaguid: iface_stat: init "
"failed to register dev event handler\n");
- goto err_zap_all_stats_entry;
+ goto err_zap_all_stats_entries;
}
err = register_inetaddr_notifier(&iface_inetaddr_notifier_blk);
if (err) {
@@ -1489,6 +1635,8 @@ err_unreg_ip4_addr:
unregister_inetaddr_notifier(&iface_inetaddr_notifier_blk);
err_unreg_nd:
unregister_netdevice_notifier(&iface_netdev_notifier_blk);
+err_zap_all_stats_entries:
+ remove_proc_entry(iface_stat_fmt_procfilename, parent_procdir);
err_zap_all_stats_entry:
remove_proc_entry(iface_stat_all_procfilename, parent_procdir);
err_zap_entry:
@@ -1540,27 +1688,6 @@ static struct sock *qtaguid_find_sk(const struct sk_buff *skb,
return sk;
}
-static int ipx_proto(const struct sk_buff *skb,
- struct xt_action_param *par)
-{
- int thoff, tproto;
-
- switch (par->family) {
- case NFPROTO_IPV6:
- tproto = ipv6_find_hdr(skb, &thoff, -1, NULL, NULL);
- if (tproto < 0)
- MT_DEBUG("%s(): transport header not found in ipv6"
- " skb=%p\n", __func__, skb);
- break;
- case NFPROTO_IPV4:
- tproto = ip_hdr(skb)->protocol;
- break;
- default:
- tproto = IPPROTO_RAW;
- }
- return tproto;
-}
-
static void account_for_uid(const struct sk_buff *skb,
const struct sock *alternate_sk, uid_t uid,
struct xt_action_param *par)
@@ -1620,8 +1747,22 @@ static bool qtaguid_mt(const struct sk_buff *skb, struct xt_action_param *par)
goto ret_res;
}
- sk = skb->sk;
+ switch (par->hooknum) {
+ case NF_INET_PRE_ROUTING:
+ case NF_INET_POST_ROUTING:
+ atomic64_inc(&qtu_events.match_calls_prepost);
+ iface_stat_update_from_skb(skb, par);
+ /*
+ * We are done in pre/post. The skb will get processed
+ * further alter.
+ */
+ res = (info->match ^ info->invert);
+ goto ret_res;
+ break;
+ /* default: Fall through and do UID releated work */
+ }
+ sk = skb->sk;
if (sk == NULL) {
/*
* A missing sk->sk_socket happens when packets are in-flight
@@ -1796,8 +1937,10 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
if (*eof)
return 0;
- CT_DEBUG("qtaguid: proc ctrl page=%p off=%ld char_count=%d *eof=%d\n",
- page, items_to_skip, char_count, *eof);
+ CT_DEBUG("qtaguid: proc ctrl pid=%u tgid=%u uid=%u "
+ "page=%p off=%ld char_count=%d *eof=%d\n",
+ current->pid, current->tgid, current_fsuid(),
+ page, items_to_skip, char_count, *eof);
spin_lock_bh(&sock_tag_list_lock);
for (node = rb_first(&sock_tag_tree);
@@ -1841,6 +1984,7 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
"delete_cmds=%llu "
"iface_events=%llu "
"match_calls=%llu "
+ "match_calls_prepost=%llu "
"match_found_sk=%llu "
"match_found_sk_in_ct=%llu "
"match_found_no_sk_in_ct=%llu "
@@ -1852,6 +1996,7 @@ static int qtaguid_ctrl_proc_read(char *page, char **num_items_returned,
atomic64_read(&qtu_events.delete_cmds),
atomic64_read(&qtu_events.iface_events),
atomic64_read(&qtu_events.match_calls),
+ atomic64_read(&qtu_events.match_calls_prepost),
atomic64_read(&qtu_events.match_found_sk),
atomic64_read(&qtu_events.match_found_sk_in_ct),
atomic64_read(
@@ -2125,7 +2270,9 @@ static int ctrl_cmd_tag(const char *input)
el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
if (!el_socket) {
pr_info("qtaguid: ctrl_tag(%s): failed to lookup"
- " sock_fd=%d err=%d\n", input, sock_fd, res);
+ " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
+ input, sock_fd, res, current->pid, current->tgid,
+ current_fsuid());
goto err;
}
CT_DEBUG("qtaguid: ctrl_tag(%s): socket->...->f_count=%ld ->sk=%p\n",
@@ -2270,7 +2417,9 @@ static int ctrl_cmd_untag(const char *input)
el_socket = sockfd_lookup(sock_fd, &res); /* This locks the file */
if (!el_socket) {
pr_info("qtaguid: ctrl_untag(%s): failed to lookup"
- " sock_fd=%d err=%d\n", input, sock_fd, res);
+ " sock_fd=%d err=%d pid=%u tgid=%u uid=%u\n",
+ input, sock_fd, res, current->pid, current->tgid,
+ current_fsuid());
goto err;
}
CT_DEBUG("qtaguid: ctrl_untag(%s): socket->...->f_count=%ld ->sk=%p\n",
@@ -2346,6 +2495,9 @@ static int qtaguid_ctrl_parse(const char *input, int count)
char cmd;
int res;
+ CT_DEBUG("qtaguid: ctrl(%s): pid=%u tgid=%u uid=%u\n",
+ input, current->pid, current->tgid, current_fsuid());
+
cmd = input[0];
/* Collect params for commands */
switch (cmd) {
@@ -2522,9 +2674,12 @@ static int qtaguid_stats_proc_read(char *page, char **num_items_returned,
return len;
}
- CT_DEBUG("qtaguid:proc stats page=%p *num_items_returned=%p off=%ld "
- "char_count=%d *eof=%d\n", page, *num_items_returned,
- items_to_skip, char_count, *eof);
+ CT_DEBUG("qtaguid:proc stats pid=%u tgid=%u uid=%u "
+ "page=%p *num_items_returned=%p off=%ld "
+ "char_count=%d *eof=%d\n",
+ current->pid, current->tgid, current_fsuid(),
+ page, *num_items_returned,
+ items_to_skip, char_count, *eof);
if (*eof)
return 0;
diff --git a/net/netfilter/xt_qtaguid_internal.h b/net/netfilter/xt_qtaguid_internal.h
index 02479d6..d79f838 100644
--- a/net/netfilter/xt_qtaguid_internal.h
+++ b/net/netfilter/xt_qtaguid_internal.h
@@ -202,7 +202,8 @@ struct iface_stat {
/* net_dev is only valid for active iface_stat */
struct net_device *net_dev;
- struct byte_packet_counters totals[IFS_MAX_DIRECTIONS];
+ struct byte_packet_counters totals_via_dev[IFS_MAX_DIRECTIONS];
+ struct byte_packet_counters totals_via_skb[IFS_MAX_DIRECTIONS];
/*
* We keep the last_known, because some devices reset their counters
* just before NETDEV_UP, while some will reset just before
@@ -254,6 +255,8 @@ struct qtaguid_event_counts {
atomic64_t iface_events; /* Number of NETDEV_* events handled */
atomic64_t match_calls; /* Number of times iptables called mt */
+ /* Number of times iptables called mt from pre or post routing hooks */
+ atomic64_t match_calls_prepost;
/*
* match_found_sk_*: numbers related to the netfilter matching
* function finding a sock for the sk_buff.
diff --git a/net/netfilter/xt_qtaguid_print.c b/net/netfilter/xt_qtaguid_print.c
index 3917678..8cbd8e4 100644
--- a/net/netfilter/xt_qtaguid_print.c
+++ b/net/netfilter/xt_qtaguid_print.c
@@ -183,7 +183,11 @@ char *pp_iface_stat(struct iface_stat *is)
res = kasprintf(GFP_ATOMIC, "iface_stat@%p{"
"list=list_head{...}, "
"ifname=%s, "
- "total={rx={bytes=%llu, "
+ "total_dev={rx={bytes=%llu, "
+ "packets=%llu}, "
+ "tx={bytes=%llu, "
+ "packets=%llu}}, "
+ "total_skb={rx={bytes=%llu, "
"packets=%llu}, "
"tx={bytes=%llu, "
"packets=%llu}}, "
@@ -198,10 +202,14 @@ char *pp_iface_stat(struct iface_stat *is)
"tag_stat_tree=rb_root{...}}",
is,
is->ifname,
- is->totals[IFS_RX].bytes,
- is->totals[IFS_RX].packets,
- is->totals[IFS_TX].bytes,
- is->totals[IFS_TX].packets,
+ is->totals_via_dev[IFS_RX].bytes,
+ is->totals_via_dev[IFS_RX].packets,
+ is->totals_via_dev[IFS_TX].bytes,
+ is->totals_via_dev[IFS_TX].packets,
+ is->totals_via_skb[IFS_RX].bytes,
+ is->totals_via_skb[IFS_RX].packets,
+ is->totals_via_skb[IFS_TX].bytes,
+ is->totals_via_skb[IFS_TX].packets,
is->last_known_valid,
is->last_known[IFS_RX].bytes,
is->last_known[IFS_RX].packets,
--
1.7.9.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/