Re: [Patch] fix packet loss and massive ping spikes with PPP multi-link

From: Richard Hartmann
Date: Wed Mar 31 2010 - 05:01:20 EST


Hi all,

this is our attempt at a cleaner version. It is still far from being
perfect and packets seem to gain one to two bytes in size sometimes
which means that you will run into "normal" fragmentation if you set
your MTU to the possible maximum (as you are then over said max) but it
does what it should, can be changed at run-time and is not panicking the
kernel.

Feedback appreciated, code even more so.


Thanks,
Richard

PS: The main patch is inline, both the main and the debug patch are
attached.


--- drivers/net/ppp_generic.c.orig 2010-03-25 16:56:05.000000000 +0100
+++ drivers/net/ppp_generic.c 2010-03-30 19:56:15.000000000 +0200
@@ -129,6 +129,7 @@
u32 nextseq; /* MP: seq no of next packet */
u32 minseq; /* MP: min of most recent seqnos */
struct sk_buff_head mrq; /* MP: receive reconstruction queue */
+ int rrsched; /* round robin scheduler for packet distribution */
#endif /* CONFIG_PPP_MULTILINK */
#ifdef CONFIG_PPP_FILTER
struct sock_filter *pass_filter; /* filter for packets to pass */
@@ -227,6 +228,17 @@
#define B 0x80 /* this fragment begins a packet */
#define E 0x40 /* this fragment ends a packet */

+#ifdef CONFIG_PPP_MULTILINK
+/* alternate fragmentation algorithm and multilink behaviour options
added by uli.staerk@xxxxxxxxxxxxxx */
+static int ppp_ml_noexplode = 0;
+module_param(ppp_ml_noexplode, int, 0600);
+MODULE_PARM_DESC(ppp_ml_noexplode, "Set this to any other values than
zero to avoid fragmentation over connected channels");
+
+static int ppp_ml_noheader = 0;
+module_param(ppp_ml_noheader, int, 0600);
+MODULE_PARM_DESC(ppp_ml_noheader, "Set this to any other value than
zero to remove the ppp-multilink protocol header (pppoes.protocol not
0x003d) which enables fragmentation and reordering");
+#endif /* CONFIG_PPP_MULTILINK */
+
/* Compare multilink sequence numbers (assumed to be 32 bits wide) */
#define seq_before(a, b) ((s32)((a) - (b)) < 0)
#define seq_after(a, b) ((s32)((a) - (b)) > 0)
@@ -250,6 +262,7 @@
static void ppp_mp_insert(struct ppp *ppp, struct sk_buff *skb);
static struct sk_buff *ppp_mp_reconstruct(struct ppp *ppp);
static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb);
+static void ppp_mp_roundrobin(struct ppp *ppp, struct sk_buff *skb);
#endif /* CONFIG_PPP_MULTILINK */
static int ppp_set_compress(struct ppp *ppp, unsigned long arg);
static void ppp_ccp_peek(struct ppp *ppp, struct sk_buff *skb, int inbound);
@@ -1292,10 +1305,18 @@
}

#ifdef CONFIG_PPP_MULTILINK
- /* Multilink: fragment the packet over as many links
- as can take the packet at the moment. */
- if (!ppp_mp_explode(ppp, skb))
+ /* send packet without multilink header */
+ if(ppp_ml_noheader) {
+ ppp_mp_roundrobin(ppp, skb);
return;
+ }
+ else {
+ /* Multilink: fragment the packet over as many links
+ as can take the packet at the moment. */
+ if (!ppp_mp_explode(ppp, skb)) {
+ return;
+ }
+ }
#endif /* CONFIG_PPP_MULTILINK */

ppp->xmit_pending = NULL;
@@ -1304,6 +1325,38 @@

#ifdef CONFIG_PPP_MULTILINK
/*
+ * Send packet through the next channel (round robin)
+ */
+static void ppp_mp_roundrobin(struct ppp *ppp, struct sk_buff *skb)
+{
+ int i;
+ struct channel *pch;
+
+ ppp->rrsched++;
+ i = 0;
+ list_for_each_entry(pch, &ppp->channels, clist) {
+ if(pch->chan == NULL) continue;
+
+ if (ppp->rrsched % ppp->n_channels == i) {
+ spin_lock_bh(&pch->downl);
+ if (pch->chan) {
+ if (pch->chan->ops->start_xmit(pch->chan, skb)) {
+ ppp->xmit_pending = NULL;
+ }
+ } else {
+ /* channel got unregistered */
+ kfree_skb(skb);
+ ppp->xmit_pending = NULL;
+ }
+ spin_unlock_bh(&pch->downl);
+ return;
+ }
+ i++;
+ }
+ return;
+}
+
+/*
* Divide a packet to be transmitted into fragments and
* send them out the individual links.
*/
@@ -1352,13 +1405,21 @@
}
++i;
}
- /*
- * Don't start sending this packet unless at least half of
- * the channels are free. This gives much better TCP
- * performance if we have a lot of channels.
- */
- if (nfree == 0 || nfree < navail / 2)
- return 0; /* can't take now, leave it in xmit_pending */
+
+
+ if(ppp_ml_noexplode) {
+ }
+ else {
+ /*
+ * Don't start sending this packet unless at least half of
+ * the channels are free. This gives much better TCP
+ * performance if we have a lot of channels.
+ */
+ if (nfree == 0 || nfree < navail / 2) {
+ return 0; /* can't take now, leave it in xmit_pending */
+
+ }
+ }

/* Do protocol field compression (XXX this should be optional) */
p = skb->data;
@@ -1371,6 +1432,7 @@
totlen = len;
nbigger = len % nfree;

+
/* skip to the channel after the one we last used
and start at that one */
list = &ppp->channels;
@@ -1432,33 +1494,40 @@
*of the channel we are going to transmit on
*/
flen = len;
- if (nfree > 0) {
- if (pch->speed == 0) {
- flen = totlen/nfree ;
- if (nbigger > 0) {
- flen++;
- nbigger--;
- }
- } else {
- flen = (((totfree - nzero)*(totlen + hdrlen*totfree)) /
- ((totspeed*totfree)/pch->speed)) - hdrlen;
- if (nbigger > 0) {
- flen += ((totfree - nzero)*pch->speed)/totspeed;
- nbigger -= ((totfree - nzero)*pch->speed)/
- totspeed;
+
+ if(ppp_ml_noexplode) {
+ nfree--;
+ }
+ else {
+ if (nfree > 0) {
+ if (pch->speed == 0) {
+ flen = totlen/nfree ;
+ if (nbigger > 0) {
+ flen++;
+ nbigger--;
+ }
+ } else {
+ flen = (((totfree - nzero)*(totlen + hdrlen*totfree)) /
+ ((totspeed*totfree)/pch->speed)) - hdrlen;
+ if (nbigger > 0) {
+ flen += ((totfree - nzero)*pch->speed)/totspeed;
+ nbigger -= ((totfree - nzero)*pch->speed)/
+ totspeed;
+ }
}
+ nfree--;
}
- nfree--;
+ /*
+ *check if we are on the last channel or
+ *we exceded the lenght of the data to
+ *fragment
+ */
+ if ((nfree <= 0) || (flen > len))
+ flen = len;
+
}

/*
- *check if we are on the last channel or
- *we exceded the lenght of the data to
- *fragment
- */
- if ((nfree <= 0) || (flen > len))
- flen = len;
- /*
*it is not worth to tx on slow channels:
*in that case from the resulting flen according to the
*above formula will be equal or less than zero.
--- drivers/net/ppp_generic.c.orig 2010-03-25 16:56:05.000000000 +0100
+++ drivers/net/ppp_generic.c 2010-03-30 19:56:15.000000000 +0200
@@ -129,6 +129,7 @@
u32 nextseq; /* MP: seq no of next packet */
u32 minseq; /* MP: min of most recent seqnos */
struct sk_buff_head mrq; /* MP: receive reconstruction queue */
+ int rrsched; /* round robin scheduler for packet distribution */
#endif /* CONFIG_PPP_MULTILINK */
#ifdef CONFIG_PPP_FILTER
struct sock_filter *pass_filter; /* filter for packets to pass */
@@ -227,6 +228,17 @@
#define B 0x80 /* this fragment begins a packet */
#define E 0x40 /* this fragment ends a packet */

+#ifdef CONFIG_PPP_MULTILINK
+/* alternate fragmentation algorithm and multilink behaviour options added by uli.staerk@xxxxxxxxxxxxxx */
+static int ppp_ml_noexplode = 0;
+module_param(ppp_ml_noexplode, int, 0600);
+MODULE_PARM_DESC(ppp_ml_noexplode, "Set this to any other values than zero to avoid fragmentation over connected channels");
+
+static int ppp_ml_noheader = 0;
+module_param(ppp_ml_noheader, int, 0600);
+MODULE_PARM_DESC(ppp_ml_noheader, "Set this to any other value than zero to remove the ppp-multilink protocol header (pppoes.protocol not 0x003d) which enables fragmentation and reordering");
+#endif /* CONFIG_PPP_MULTILINK */
+
/* Compare multilink sequence numbers (assumed to be 32 bits wide) */
#define seq_before(a, b) ((s32)((a) - (b)) < 0)
#define seq_after(a, b) ((s32)((a) - (b)) > 0)
@@ -250,6 +262,7 @@
static void ppp_mp_insert(struct ppp *ppp, struct sk_buff *skb);
static struct sk_buff *ppp_mp_reconstruct(struct ppp *ppp);
static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb);
+static void ppp_mp_roundrobin(struct ppp *ppp, struct sk_buff *skb);
#endif /* CONFIG_PPP_MULTILINK */
static int ppp_set_compress(struct ppp *ppp, unsigned long arg);
static void ppp_ccp_peek(struct ppp *ppp, struct sk_buff *skb, int inbound);
@@ -1292,10 +1305,18 @@
}

#ifdef CONFIG_PPP_MULTILINK
- /* Multilink: fragment the packet over as many links
- as can take the packet at the moment. */
- if (!ppp_mp_explode(ppp, skb))
+ /* send packet without multilink header */
+ if(ppp_ml_noheader) {
+ ppp_mp_roundrobin(ppp, skb);
return;
+ }
+ else {
+ /* Multilink: fragment the packet over as many links
+ as can take the packet at the moment. */
+ if (!ppp_mp_explode(ppp, skb)) {
+ return;
+ }
+ }
#endif /* CONFIG_PPP_MULTILINK */

ppp->xmit_pending = NULL;
@@ -1304,6 +1325,38 @@

#ifdef CONFIG_PPP_MULTILINK
/*
+ * Send packet through the next channel (round robin)
+ */
+static void ppp_mp_roundrobin(struct ppp *ppp, struct sk_buff *skb)
+{
+ int i;
+ struct channel *pch;
+
+ ppp->rrsched++;
+ i = 0;
+ list_for_each_entry(pch, &ppp->channels, clist) {
+ if(pch->chan == NULL) continue;
+
+ if (ppp->rrsched % ppp->n_channels == i) {
+ spin_lock_bh(&pch->downl);
+ if (pch->chan) {
+ if (pch->chan->ops->start_xmit(pch->chan, skb)) {
+ ppp->xmit_pending = NULL;
+ }
+ } else {
+ /* channel got unregistered */
+ kfree_skb(skb);
+ ppp->xmit_pending = NULL;
+ }
+ spin_unlock_bh(&pch->downl);
+ return;
+ }
+ i++;
+ }
+ return;
+}
+
+/*
* Divide a packet to be transmitted into fragments and
* send them out the individual links.
*/
@@ -1352,13 +1405,21 @@
}
++i;
}
- /*
- * Don't start sending this packet unless at least half of
- * the channels are free. This gives much better TCP
- * performance if we have a lot of channels.
- */
- if (nfree == 0 || nfree < navail / 2)
- return 0; /* can't take now, leave it in xmit_pending */
+
+
+ if(ppp_ml_noexplode) {
+ }
+ else {
+ /*
+ * Don't start sending this packet unless at least half of
+ * the channels are free. This gives much better TCP
+ * performance if we have a lot of channels.
+ */
+ if (nfree == 0 || nfree < navail / 2) {
+ return 0; /* can't take now, leave it in xmit_pending */
+
+ }
+ }

/* Do protocol field compression (XXX this should be optional) */
p = skb->data;
@@ -1371,6 +1432,7 @@
totlen = len;
nbigger = len % nfree;

+
/* skip to the channel after the one we last used
and start at that one */
list = &ppp->channels;
@@ -1432,33 +1494,40 @@
*of the channel we are going to transmit on
*/
flen = len;
- if (nfree > 0) {
- if (pch->speed == 0) {
- flen = totlen/nfree ;
- if (nbigger > 0) {
- flen++;
- nbigger--;
- }
- } else {
- flen = (((totfree - nzero)*(totlen + hdrlen*totfree)) /
- ((totspeed*totfree)/pch->speed)) - hdrlen;
- if (nbigger > 0) {
- flen += ((totfree - nzero)*pch->speed)/totspeed;
- nbigger -= ((totfree - nzero)*pch->speed)/
- totspeed;
+
+ if(ppp_ml_noexplode) {
+ nfree--;
+ }
+ else {
+ if (nfree > 0) {
+ if (pch->speed == 0) {
+ flen = totlen/nfree ;
+ if (nbigger > 0) {
+ flen++;
+ nbigger--;
+ }
+ } else {
+ flen = (((totfree - nzero)*(totlen + hdrlen*totfree)) /
+ ((totspeed*totfree)/pch->speed)) - hdrlen;
+ if (nbigger > 0) {
+ flen += ((totfree - nzero)*pch->speed)/totspeed;
+ nbigger -= ((totfree - nzero)*pch->speed)/
+ totspeed;
+ }
}
+ nfree--;
}
- nfree--;
+ /*
+ *check if we are on the last channel or
+ *we exceded the lenght of the data to
+ *fragment
+ */
+ if ((nfree <= 0) || (flen > len))
+ flen = len;
+
}

/*
- *check if we are on the last channel or
- *we exceded the lenght of the data to
- *fragment
- */
- if ((nfree <= 0) || (flen > len))
- flen = len;
- /*
*it is not worth to tx on slow channels:
*in that case from the resulting flen according to the
*above formula will be equal or less than zero.
--- drivers/net/ppp_generic.c.orig 2010-03-25 16:56:05.000000000 +0100
+++ drivers/net/ppp_generic.c.patched_with_debug 2010-03-30 20:03:31.000000000 +0200
@@ -129,6 +129,7 @@
u32 nextseq; /* MP: seq no of next packet */
u32 minseq; /* MP: min of most recent seqnos */
struct sk_buff_head mrq; /* MP: receive reconstruction queue */
+ int rrsched; /* round robin scheduler for packet distribution */
#endif /* CONFIG_PPP_MULTILINK */
#ifdef CONFIG_PPP_FILTER
struct sock_filter *pass_filter; /* filter for packets to pass */
@@ -227,6 +228,17 @@
#define B 0x80 /* this fragment begins a packet */
#define E 0x40 /* this fragment ends a packet */

+#ifdef CONFIG_PPP_MULTILINK
+/* alternate fragmentation algorithm and multilink behaviour options added by uli.staerk@xxxxxxxxxxxxxx */
+static int ppp_ml_noexplode = 0;
+module_param(ppp_ml_noexplode, int, 0600);
+MODULE_PARM_DESC(ppp_ml_noexplode, "Set this to any other values than zero to avoid fragmentation over connected channels");
+
+static int ppp_ml_noheader = 0;
+module_param(ppp_ml_noheader, int, 0600);
+MODULE_PARM_DESC(ppp_ml_noheader, "Set this to any other value than zero to remove the ppp-multilink protocol header (pppoes.protocol not 0x003d) which enables fragmentation and reordering");
+#endif /* CONFIG_PPP_MULTILINK */
+
/* Compare multilink sequence numbers (assumed to be 32 bits wide) */
#define seq_before(a, b) ((s32)((a) - (b)) < 0)
#define seq_after(a, b) ((s32)((a) - (b)) > 0)
@@ -250,6 +262,7 @@
static void ppp_mp_insert(struct ppp *ppp, struct sk_buff *skb);
static struct sk_buff *ppp_mp_reconstruct(struct ppp *ppp);
static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb);
+static void ppp_mp_roundrobin(struct ppp *ppp, struct sk_buff *skb);
#endif /* CONFIG_PPP_MULTILINK */
static int ppp_set_compress(struct ppp *ppp, unsigned long arg);
static void ppp_ccp_peek(struct ppp *ppp, struct sk_buff *skb, int inbound);
@@ -1273,6 +1286,7 @@
return;
}

+printk(KERN_ERR "send packet\n");
if ((ppp->flags & SC_MULTILINK) == 0) {
/* not doing multilink: send it down the first channel */
list = list->next;
@@ -1292,11 +1306,24 @@
}

#ifdef CONFIG_PPP_MULTILINK
- /* Multilink: fragment the packet over as many links
- as can take the packet at the moment. */
- if (!ppp_mp_explode(ppp, skb))
+ /* send packet without multilink header */
+ if(ppp_ml_noheader) {
+printk(KERN_ERR "BEGIN SEND RR\n");
+ ppp_mp_roundrobin(ppp, skb);
return;
+printk(KERN_ERR "END SEND RR\n");
+ }
+ else {
+printk(KERN_ERR "BEGIN SEND MULTILINK\n");
+ /* Multilink: fragment the packet over as many links
+ as can take the packet at the moment. */
+ if (!ppp_mp_explode(ppp, skb)) {
+printk(KERN_ERR "END SEND MULTILINK\n");
+ return;
+ }
+ }
#endif /* CONFIG_PPP_MULTILINK */
+printk(KERN_ERR "END SEND DROP PACKET\n");

ppp->xmit_pending = NULL;
kfree_skb(skb);
@@ -1304,6 +1331,41 @@

#ifdef CONFIG_PPP_MULTILINK
/*
+ * Send packet through the next channel (round robin)
+ */
+static void ppp_mp_roundrobin(struct ppp *ppp, struct sk_buff *skb)
+{
+ int i;
+ struct channel *pch;
+
+ ppp->rrsched++;
+printk(KERN_ERR " RR counter=%d, len=%d, devmtu=%d\n", ppp->rrsched, skb->len, ppp->dev->mtu);
+ i = 0;
+ list_for_each_entry(pch, &ppp->channels, clist) {
+ if(pch->chan == NULL) continue;
+
+ if (ppp->rrsched % ppp->n_channels == i) {
+printk(KERN_ERR " RR send via %d, chmtu=%d\n", i, pch->chan->mtu);
+ spin_lock_bh(&pch->downl);
+ if (pch->chan) {
+ if (pch->chan->ops->start_xmit(pch->chan, skb)) {
+ ppp->xmit_pending = NULL;
+ }
+ } else {
+printk(KERN_ERR " RR dropped at %d\n", i);
+ /* channel got unregistered */
+ kfree_skb(skb);
+ ppp->xmit_pending = NULL;
+ }
+ spin_unlock_bh(&pch->downl);
+ return;
+ }
+ i++;
+ }
+ return;
+}
+
+/*
* Divide a packet to be transmitted into fragments and
* send them out the individual links.
*/
@@ -1352,13 +1414,25 @@
}
++i;
}
- /*
- * Don't start sending this packet unless at least half of
- * the channels are free. This gives much better TCP
- * performance if we have a lot of channels.
- */
- if (nfree == 0 || nfree < navail / 2)
- return 0; /* can't take now, leave it in xmit_pending */
+
+printk(KERN_ERR " ML nfree=%d, navail=%d, nzero=%d, totfree=%d, totspeed=%d\n", nfree,navail,nzero,totfree,totspeed);
+
+ if(ppp_ml_noexplode) {
+printk(KERN_ERR " ML no explode A\n");
+ }
+ else {
+printk(KERN_ERR " ML explode A\n");
+ /*
+ * Don't start sending this packet unless at least half of
+ * the channels are free. This gives much better TCP
+ * performance if we have a lot of channels.
+ */
+ if (nfree == 0 || nfree < navail / 2) {
+printk(KERN_ERR " ML wait A\n");
+ return 0; /* can't take now, leave it in xmit_pending */
+
+ }
+ }

/* Do protocol field compression (XXX this should be optional) */
p = skb->data;
@@ -1371,6 +1445,8 @@
totlen = len;
nbigger = len % nfree;

+printk(KERN_ERR " ML len=%d, totlen=%d, nbigger=%d\n", len, totlen, nbigger);
+
/* skip to the channel after the one we last used
and start at that one */
list = &ppp->channels;
@@ -1381,10 +1457,12 @@
break;
}
}
+printk(KERN_ERR " ML skip to channel=%d\n", i);

/* create a fragment for each channel */
bits = B;
while (len > 0) {
+printk(KERN_ERR " ML while len=%d, i=%d\n", len, i);
list = list->next;
if (list == &ppp->channels) {
i = 0;
@@ -1432,45 +1510,58 @@
*of the channel we are going to transmit on
*/
flen = len;
- if (nfree > 0) {
- if (pch->speed == 0) {
- flen = totlen/nfree ;
- if (nbigger > 0) {
- flen++;
- nbigger--;
- }
- } else {
- flen = (((totfree - nzero)*(totlen + hdrlen*totfree)) /
- ((totspeed*totfree)/pch->speed)) - hdrlen;
- if (nbigger > 0) {
- flen += ((totfree - nzero)*pch->speed)/totspeed;
- nbigger -= ((totfree - nzero)*pch->speed)/
- totspeed;
+printk(KERN_ERR " ML fragmentlen=%d\n", flen);
+
+ if(ppp_ml_noexplode) {
+printk(KERN_ERR " ML no explode B \n");
+ nfree--;
+ }
+ else {
+printk(KERN_ERR " ML explode B \n");
+ if (nfree > 0) {
+ if (pch->speed == 0) {
+ flen = totlen/nfree ;
+ if (nbigger > 0) {
+ flen++;
+ nbigger--;
+ }
+ } else {
+ flen = (((totfree - nzero)*(totlen + hdrlen*totfree)) /
+ ((totspeed*totfree)/pch->speed)) - hdrlen;
+ if (nbigger > 0) {
+ flen += ((totfree - nzero)*pch->speed)/totspeed;
+ nbigger -= ((totfree - nzero)*pch->speed)/
+ totspeed;
+ }
}
+ nfree--;
}
- nfree--;
+ /*
+ *check if we are on the last channel or
+ *we exceded the lenght of the data to
+ *fragment
+ */
+ if ((nfree <= 0) || (flen > len))
+ flen = len;
+
+printk(KERN_ERR " ML new fragmentlen=%d\n", flen);
}

/*
- *check if we are on the last channel or
- *we exceded the lenght of the data to
- *fragment
- */
- if ((nfree <= 0) || (flen > len))
- flen = len;
- /*
*it is not worth to tx on slow channels:
*in that case from the resulting flen according to the
*above formula will be equal or less than zero.
*Skip the channel in this case
*/
if (flen <= 0) {
+printk(KERN_ERR " ML fragmentlen is zero\n");
pch->avail = 2;
spin_unlock_bh(&pch->downl);
continue;
}

mtu = pch->chan->mtu - hdrlen;
+printk(KERN_ERR " ML mtu=%d (chan-mtu=%d)\n", mtu, pch->chan->mtu);
if (mtu < 4)
mtu = 4;
if (flen > mtu)
@@ -1502,6 +1593,7 @@
if (!skb_queue_empty(&pch->file.xq) ||
!chan->ops->start_xmit(chan, frag))
skb_queue_tail(&pch->file.xq, frag);
+printk(KERN_ERR " ML sent packet with seq: %d\n", ppp->nxseq);
pch->had_frag = 1;
p += flen;
len -= flen;
@@ -1510,6 +1602,7 @@
spin_unlock_bh(&pch->downl);
}
ppp->nxchan = i;
+printk(KERN_ERR " ML END SEND\n");

return 1;