[PATCH NET-NEXT 04/10] net: infrastructure for hardware time stamping

From: Patrick Ohly
Date: Thu Feb 12 2009 - 10:06:45 EST


The additional per-packet information (16 bytes for time stamps, 1
byte for flags) is stored for all packets in the skb_shared_info
struct. This implementation detail is hidden from users of that
information via skb_* accessor functions. A separate struct resp.
union is used for the additional information so that it can be
stored/copied easily outside of skb_shared_info.

Compared to previous implementations (reusing the tstamp field
depending on the context, optional additional structures) this
is the simplest solution. It does not extend sk_buff itself.

TX time stamping is implemented in software if the device driver
doesn't support hardware time stamping.

The new semantic for hardware/software time stamping around
ndo_start_xmit() is based on two assumptions about existing
network device drivers which don't support hardware time
stamping and know nothing about it:
- they leave the new skb_shared_tx unmodified
- the keep the connection to the originating socket in skb->sk
alive, i.e., don't call skb_orphan()

Given that skb_shared_tx is new, the first assumption is safe.
The second is only true for some drivers. As a result, software
TX time stamping currently works with the bnx2 driver, but not
with the unmodified igb driver (the two drivers this patch series
was tested with).

Signed-off-by: Patrick Ohly <patrick.ohly@xxxxxxxxx>
---
include/linux/skbuff.h | 91 +++++++++++++++++++++++++++++++++++++++++++++++-
net/core/dev.c | 32 ++++++++++++++++-
net/core/skbuff.c | 41 +++++++++++++++++++++
3 files changed, 161 insertions(+), 3 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9247008..f96bc91 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -132,6 +132,57 @@ struct skb_frag_struct {
__u32 size;
};

+#define HAVE_HW_TIME_STAMP
+
+/**
+ * skb_shared_hwtstamps - hardware time stamps
+ *
+ * @hwtstamp: hardware time stamp transformed into duration
+ * since arbitrary point in time
+ * @syststamp: hwtstamp transformed to system time base
+ *
+ * Software time stamps generated by ktime_get_real() are stored in
+ * skb->tstamp. The relation between the different kinds of time
+ * stamps is as follows:
+ *
+ * syststamp and tstamp can be compared against each other in
+ * arbitrary combinations. The accuracy of a
+ * syststamp/tstamp/"syststamp from other device" comparison is
+ * limited by the accuracy of the transformation into system time
+ * base. This depends on the device driver and its underlying
+ * hardware.
+ *
+ * hwtstamps can only be compared against other hwtstamps from
+ * the same device.
+ *
+ * This structure is attached to packets as part of the
+ * &skb_shared_info. Use skb_hwtstamps() to get a pointer.
+ */
+struct skb_shared_hwtstamps {
+ ktime_t hwtstamp;
+ ktime_t syststamp;
+};
+
+/**
+ * skb_shared_tx - instructions for time stamping of outgoing packets
+ *
+ * @hardware: generate hardware time stamp
+ * @software: generate software time stamp
+ * @in_progress: device driver is going to provide
+ * hardware time stamp
+ *
+ * These flags are attached to packets as part of the
+ * &skb_shared_info. Use skb_tx() to get a pointer.
+ */
+union skb_shared_tx {
+ struct {
+ __u8 hardware:1,
+ software:1,
+ in_progress:1;
+ };
+ __u8 flags;
+};
+
/* This data is invariant across clones and lives at
* the end of the header data, ie. at skb->end.
*/
@@ -143,10 +194,12 @@ struct skb_shared_info {
unsigned short gso_segs;
unsigned short gso_type;
__be32 ip6_frag_id;
+ union skb_shared_tx tx_flags;
#ifdef CONFIG_HAS_DMA
unsigned int num_dma_maps;
#endif
struct sk_buff *frag_list;
+ struct skb_shared_hwtstamps hwtstamps;
skb_frag_t frags[MAX_SKB_FRAGS];
#ifdef CONFIG_HAS_DMA
dma_addr_t dma_maps[MAX_SKB_FRAGS + 1];
@@ -465,6 +518,16 @@ static inline unsigned char *skb_end_pointer(const struct sk_buff *skb)
/* Internal */
#define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB)))

+static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb)
+{
+ return &skb_shinfo(skb)->hwtstamps;
+}
+
+static inline union skb_shared_tx *skb_tx(struct sk_buff *skb)
+{
+ return &skb_shinfo(skb)->tx_flags;
+}
+
/**
* skb_queue_empty - check if a queue is empty
* @list: queue head
@@ -1730,6 +1793,11 @@ static inline void skb_copy_to_linear_data_offset(struct sk_buff *skb,

extern void skb_init(void);

+static inline ktime_t skb_get_ktime(const struct sk_buff *skb)
+{
+ return skb->tstamp;
+}
+
/**
* skb_get_timestamp - get timestamp from a skb
* @skb: skb to get stamp from
@@ -1739,11 +1807,18 @@ extern void skb_init(void);
* This function converts the offset back to a struct timeval and stores
* it in stamp.
*/
-static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *stamp)
+static inline void skb_get_timestamp(const struct sk_buff *skb,
+ struct timeval *stamp)
{
*stamp = ktime_to_timeval(skb->tstamp);
}

+static inline void skb_get_timestampns(const struct sk_buff *skb,
+ struct timespec *stamp)
+{
+ *stamp = ktime_to_timespec(skb->tstamp);
+}
+
static inline void __net_timestamp(struct sk_buff *skb)
{
skb->tstamp = ktime_get_real();
@@ -1759,6 +1834,20 @@ static inline ktime_t net_invalid_timestamp(void)
return ktime_set(0, 0);
}

+/**
+ * skb_tstamp_tx - queue clone of skb with send time stamps
+ * @orig_skb: the original outgoing packet
+ * @hwtstamps: hardware time stamps, may be NULL if not available
+ *
+ * If the skb has a socket associated, then this function clones the
+ * skb (thus sharing the actual data and optional structures), stores
+ * the optional hardware time stamping information (if non NULL) or
+ * generates a software time stamp (otherwise), then queues the clone
+ * to the error queue of the socket. Errors are silently ignored.
+ */
+extern void skb_tstamp_tx(struct sk_buff *orig_skb,
+ struct skb_shared_hwtstamps *hwtstamps);
+
extern __sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len);
extern __sum16 __skb_checksum_complete(struct sk_buff *skb);

diff --git a/net/core/dev.c b/net/core/dev.c
index 1e27a67..d20c28e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1672,10 +1672,21 @@ static int dev_gso_segment(struct sk_buff *skb)
return 0;
}

+static void tstamp_tx(struct sk_buff *skb)
+{
+ union skb_shared_tx *shtx =
+ skb_tx(skb);
+ if (unlikely(shtx->software &&
+ !shtx->in_progress)) {
+ skb_tstamp_tx(skb, NULL);
+ }
+}
+
int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
struct netdev_queue *txq)
{
const struct net_device_ops *ops = dev->netdev_ops;
+ int rc;

prefetch(&dev->netdev_ops->ndo_start_xmit);
if (likely(!skb->next)) {
@@ -1689,13 +1700,29 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
goto gso;
}

- return ops->ndo_start_xmit(skb, dev);
+ rc = ops->ndo_start_xmit(skb, dev);
+ /*
+ * TODO: if skb_orphan() was called by
+ * dev->hard_start_xmit() (for example, the unmodified
+ * igb driver does that; bnx2 doesn't), then
+ * skb_tx_software_timestamp() will be unable to send
+ * back the time stamp.
+ *
+ * How can this be prevented? Always create another
+ * reference to the socket before calling
+ * dev->hard_start_xmit()? Prevent that skb_orphan()
+ * does anything in dev->hard_start_xmit() by clearing
+ * the skb destructor before the call and restoring it
+ * afterwards, then doing the skb_orphan() ourselves?
+ */
+ if (likely(!rc))
+ tstamp_tx(skb);
+ return rc;
}

gso:
do {
struct sk_buff *nskb = skb->next;
- int rc;

skb->next = nskb->next;
nskb->next = NULL;
@@ -1705,6 +1732,7 @@ gso:
skb->next = nskb;
return rc;
}
+ tstamp_tx(skb);
if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
return NETDEV_TX_BUSY;
} while (skb->next);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7657cec..7b831a7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -55,6 +55,7 @@
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/scatterlist.h>
+#include <linux/errqueue.h>

#include <net/protocol.h>
#include <net/dst.h>
@@ -215,7 +216,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
shinfo->gso_segs = 0;
shinfo->gso_type = 0;
shinfo->ip6_frag_id = 0;
+ shinfo->tx_flags.flags = 0;
shinfo->frag_list = NULL;
+ memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));

if (fclone) {
struct sk_buff *child = skb + 1;
@@ -2940,6 +2943,44 @@ int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
}
EXPORT_SYMBOL_GPL(skb_cow_data);

+void skb_tstamp_tx(struct sk_buff *orig_skb,
+ struct skb_shared_hwtstamps *hwtstamps)
+{
+ struct sock *sk = orig_skb->sk;
+ struct sock_exterr_skb *serr;
+ struct sk_buff *skb;
+ int err;
+
+ if (!sk)
+ return;
+
+ skb = skb_clone(orig_skb, GFP_ATOMIC);
+ if (!skb)
+ return;
+
+ if (hwtstamps) {
+ *skb_hwtstamps(skb) =
+ *hwtstamps;
+ } else {
+ /*
+ * no hardware time stamps available,
+ * so keep the skb_shared_tx and only
+ * store software time stamp
+ */
+ skb->tstamp = ktime_get_real();
+ }
+
+ serr = SKB_EXT_ERR(skb);
+ memset(serr, 0, sizeof(*serr));
+ serr->ee.ee_errno = ENOMSG;
+ serr->ee.ee_origin = SO_EE_ORIGIN_TIMESTAMPING;
+ err = sock_queue_err_skb(sk, skb);
+ if (err)
+ kfree_skb(skb);
+}
+EXPORT_SYMBOL_GPL(skb_tstamp_tx);
+
+
/**
* skb_partial_csum_set - set up and verify partial csum values for packet
* @skb: the skb to set
--
1.6.1.2

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/