2.0.0 network patch kit

Alan Cox (alan@lxorguk.ukuu.org.uk)
Fri, 28 Jun 96 23:08 BST


* This diff contains the following fixes from 2.0.0 intended for 2.0.1
*
* o Document right C++ library [Cosmetic]
* o syscall_count exported to modules for SMP nfs module [Cosmetic]
* o Baycom 9600 baud modem driver fixed [Serious]
* o Softdog doesn't lockup if compiled with nowayout when
* closed and re-opened [Serious]
* o Ethernet control refuses to let anyone set mtu > 1500 [Cosmetic]
* o NFS RPC handling fixes, also Solaris 2.5 "funny" workaround [Serious]
* o SMP scheduling accounting fixed [Cosmetic]
* o Vmscan swapper fix to the 5 second pause bug [Serious]
* o Firewall doesnt enforce minimum size is iphdr + 8 bytes
* for protocols except TCP UDP [Cosmetic]
* o Masquerading works with PPP [Serious]
* o panic skb_push: when a route shifts from slip to other
* networks during a retransmission [Critical]
* o Two races on mtu discovery fixed (TCP: **bug** "copy" <= 0) [Critical]
* o Ack deadlock under extreme load fixed [Cosmetic]
* o Socket accounting fix for accept()'ed socket [Cosmetic]
* o Incorrect handling of raw packets when IP_HDRINCL is set
* and the target is multicast or broadcast. (Broke mrouted)
* [Serious]
*
* These are the main problems that have shown up in the parts of 2.0
* I am handling. If you apply this diff remember to remove it before
* applying the 2.0.1 patch once released. You can do this using the -R
* option to patch.
*
* Alan.

diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/Documentation/Changes linux/Documentation/Changes
--- linux.vanilla/Documentation/Changes Sun Jun 9 13:13:19 1996
+++ linux/Documentation/Changes Mon Jun 17 22:44:01 1996
@@ -491,8 +491,13 @@
Linux C++ Library
=================

+ftp://sunsite.unc.edu/pub/Linux/GCC/libg++-2.7.1.3.bin.tar.gz
ftp://sunsite.unc.edu/pub/Linux/GCC/libg++-2.7.1.4.bin.tar.gz
+
+Use libc5.2.18 with 2.7.1.3, libc5.3.12 with 2.7.1.4
+
Installation notes:
+ftp://sunsite.unc.edu/pub/Linux/GCC/release.libg++-2.7.1.3
ftp://sunsite.unc.edu/pub/Linux/GCC/release.libg++-2.7.1.4

Dynamic Linker
diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/arch/i386/kernel/ksyms.c linux/arch/i386/kernel/ksyms.c
--- linux.vanilla/arch/i386/kernel/ksyms.c Sun Jun 9 13:12:53 1996
+++ linux/arch/i386/kernel/ksyms.c Mon Jun 17 23:02:23 1996
@@ -14,6 +14,7 @@
#ifdef __SMP__
X(apic_reg), /* Needed internally for the I386 inlines */
X(cpu_data),
+ X(syscall_count),
#endif
#include <linux/symtab_end.h>
};
diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/drivers/char/baycom.c linux/drivers/char/baycom.c
--- linux.vanilla/drivers/char/baycom.c Sun Jun 9 13:12:25 1996
+++ linux/drivers/char/baycom.c Mon Jun 17 20:24:21 1996
@@ -236,7 +236,6 @@
unsigned int dcd_shreg;
unsigned long descram;
unsigned long scram;
- unsigned char last_rxbit;
};

struct modem_state {
@@ -581,8 +580,7 @@

/* ---------------------------------------------------------------------- */
/*
- * The HDLC routines could be more efficient; they could take more than
- * one bit per call
+ * The HDLC routines
*/

static inline int hdlc_rx_add_bytes(struct baycom_state *bc,
@@ -1212,7 +1210,7 @@
{
register struct baycom_state *bc = (struct baycom_state *)dev_id;
int i;
- unsigned int data, rawdata, mask, mask2;
+ unsigned int data, mask, mask2;

if (!bc || bc->magic != BAYCOM_MAGIC)
return;
@@ -1270,7 +1268,7 @@
/*
* do receiver; differential decode and descramble on the fly
*/
- for(rawdata = data = i = 0; i < PAR96_BURSTBITS; i++) {
+ for(data = i = 0; i < PAR96_BURSTBITS; i++) {
unsigned int descx;
bc->modem.par96.descram = (bc->modem.par96.descram << 1);
if (inb(LPT_STATUS(bc->iobase)) & PAR96_RXBIT)
@@ -1281,14 +1279,9 @@
outb(PAR97_POWER | PAR96_PTT, LPT_DATA(bc->iobase));
descx ^= ((descx >> PAR96_DESCRAM_TAPSH1) ^
(descx >> PAR96_DESCRAM_TAPSH2));
- if (descx & 1)
- bc->modem.par96.last_rxbit =
- !bc->modem.par96.last_rxbit;
data >>= 1;
- if (bc->modem.par96.last_rxbit)
+ if (!(descx & 1))
data |= 0x8000;
- rawdata <<= 1;
- rawdata |= !(descx & 1);
outb(PAR97_POWER | PAR96_PTT | PAR96_BURST,
LPT_DATA(bc->iobase));
}
@@ -1304,16 +1297,16 @@
* do DCD algorithm
*/
if (bc->options & BAYCOM_OPTIONS_SOFTDCD) {
- bc->modem.par96.dcd_shreg = (bc->modem.par96.dcd_shreg << 16)
- | rawdata;
+ bc->modem.par96.dcd_shreg = (bc->modem.par96.dcd_shreg >> 16)
+ | (data << 16);
/* search for flags and set the dcd counter appropriately */
- for(mask = 0x7f8000, mask2 = 0x3f0000, i = 0;
- i < PAR96_BURSTBITS; i++, mask >>= 1, mask2 >>= 1)
+ for(mask = 0x1fe00, mask2 = 0xfc00, i = 0;
+ i < PAR96_BURSTBITS; i++, mask <<= 1, mask2 <<= 1)
if ((bc->modem.par96.dcd_shreg & mask) == mask2)
bc->modem.par96.dcd_count = BAYCOM_MAXFLEN+4;
/* check for abort/noise sequences */
- for(mask = 0x3f8000, mask2 = 0x3f8000, i = 0;
- i < PAR96_BURSTBITS; i++, mask >>= 1, mask2 >>= 1)
+ for(mask = 0x1fe00, mask2 = 0x1fe00, i = 0;
+ i < PAR96_BURSTBITS; i++, mask <<= 1, mask2 <<= 1)
if ((bc->modem.par96.dcd_shreg & mask) == mask2)
if (bc->modem.par96.dcd_count >= 0)
bc->modem.par96.dcd_count -=
diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/drivers/char/softdog.c linux/drivers/char/softdog.c
--- linux.vanilla/drivers/char/softdog.c Sun Jun 9 13:12:22 1996
+++ linux/drivers/char/softdog.c Thu Jun 20 20:48:52 1996
@@ -74,6 +74,7 @@
* Activate timer
*/
watchdog_ticktock.expires=jiffies + (soft_margin * HZ);
+ del_timer(&watchdog_ticktock);
add_timer(&watchdog_ticktock);
timer_alive++;
return 0;
diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/drivers/net/net_init.c linux/drivers/net/net_init.c
--- linux.vanilla/drivers/net/net_init.c Sun Jun 9 13:12:08 1996
+++ linux/drivers/net/net_init.c Mon Jun 17 20:25:46 1996
@@ -20,6 +20,8 @@
Use dev_close cleanly so we always shut things down tidily.

Changed 29/10/95, Alan Cox to pass sockaddr's around for mac addresses.
+
+ 14/06/96 - Paul Gortmaker: Add generic eth_change_mtu() function.
*/

#include <linux/config.h>
@@ -144,6 +146,14 @@
return 0;
}

+static int eth_change_mtu(struct device *dev, int new_mtu)
+{
+ if ((new_mtu < 68) || (new_mtu > 1500))
+ return -EINVAL;
+ dev->mtu = new_mtu;
+ return 0;
+}
+
void ether_setup(struct device *dev)
{
int i;
@@ -165,6 +175,7 @@
}
}

+ dev->change_mtu = eth_change_mtu;
dev->hard_header = eth_header;
dev->rebuild_header = eth_rebuild_header;
dev->set_mac_address = eth_mac_addr;
diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/fs/nfs/bio.c linux/fs/nfs/bio.c
--- linux.vanilla/fs/nfs/bio.c Sun Jun 9 13:11:31 1996
+++ linux/fs/nfs/bio.c Mon Jun 17 20:26:26 1996
@@ -16,6 +16,9 @@
* Another possible solution to this problem may be to have a cache of recent
* RPC call results indexed by page pointer, or even a result code field
* in struct page.
+ *
+ * June 96: Added retries of RPCs that seem to have failed for a transient
+ * reason.
*/

#include <linux/sched.h>
@@ -90,64 +93,114 @@
}

/*
+ * This is the function to (re-) transmit an NFS readahead request
+ */
+static int
+nfsiod_read_setup(struct nfsiod_req *req)
+{
+ struct inode *inode = req->rq_inode;
+ struct page *page = req->rq_page;
+
+ return nfs_proc_read_request(&req->rq_rpcreq,
+ NFS_SERVER(inode), NFS_FH(inode),
+ page->offset, PAGE_SIZE,
+ (__u32 *) page_address(page));
+}
+
+/*
* This is the callback from nfsiod telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
-static void
-nfs_read_cb(int result, struct nfsiod_req *req)
+static int
+nfsiod_read_result(int result, struct nfsiod_req *req)
{
- struct page *page = (struct page *) req->rq_cdata;
+ struct nfs_server *server = NFS_SERVER(req->rq_inode);
+ struct page *page = req->rq_page;
static int succ = 0, fail = 0;
+ int i;

dprintk("BIO: received callback for page %p, result %d\n",
page, result);

- if (result >= 0
- && (result = nfs_proc_read_reply(&req->rq_rpcreq)) >= 0) {
- succ++;
+ if (result >= 0) {
+ struct nfs_fattr fattr;
+
+ result = nfs_proc_read_reply(&req->rq_rpcreq, &fattr);
+ if (result >= 0) {
+ nfs_refresh_inode(req->rq_inode, &fattr);
+ if (result < PAGE_SIZE)
+ memset((u8 *) page_address(page)+result,
+ 0, PAGE_SIZE-result);
+ }
+ } else
+ if (result == -ETIMEDOUT && !(server->flags & NFS_MOUNT_SOFT)) {
+ /* XXX: Theoretically, we'd have to increment the initial
+ * timeo here; but I'm not going to bother with this now
+ * because this old nfsiod stuff will soon die anyway.
+ */
+ result = -EAGAIN;
+ }
+
+ if (result == -EAGAIN && req->rq_retries--) {
+ dprintk("BIO: retransmitting request.\n");
+ memset(&req->rq_rpcreq, 0, sizeof(struct rpc_ioreq));
+ while (rpc_reserve(server->rsock, &req->rq_rpcreq, 1) < 0)
+ schedule();
+ current->fsuid = req->rq_fsuid;
+ current->fsgid = req->rq_fsgid;
+ for (i = 0; i < NGROUPS; i++)
+ current->groups[i] = req->rq_groups[i];
+ nfsiod_read_setup(req);
+ return 0;
+ }
+ if (result >= 0) {
set_bit(PG_uptodate, &page->flags);
+ succ++;
} else {
- fail++;
dprintk("BIO: %d successful reads, %d failures\n", succ, fail);
set_bit(PG_error, &page->flags);
+ fail++;
}
clear_bit(PG_locked, &page->flags);
wake_up(&page->wait);
free_page(page_address(page));
+ return 1;
}

static inline int
do_read_nfs_async(struct inode *inode, struct page *page)
{
struct nfsiod_req *req;
- int result = -1; /* totally arbitrary */
+ int result, i;

dprintk("NFS: do_read_nfs_async(%p)\n", page);

set_bit(PG_locked, &page->flags);
clear_bit(PG_error, &page->flags);

- if (!(req = nfsiod_reserve(NFS_SERVER(inode), nfs_read_cb)))
- goto done;
- result = nfs_proc_read_request(&req->rq_rpcreq,
- NFS_SERVER(inode), NFS_FH(inode),
- page->offset, PAGE_SIZE,
- (__u32 *) page_address(page));
- if (result >= 0) {
- req->rq_cdata = page;
+ if (!(req = nfsiod_reserve(NFS_SERVER(inode))))
+ return -EAGAIN;
+
+ req->rq_retries = 5;
+ req->rq_callback = nfsiod_read_result;
+ req->rq_inode = inode;
+ req->rq_page = page;
+
+ req->rq_fsuid = current->fsuid;
+ req->rq_fsgid = current->fsgid;
+ for (i = 0; i < NGROUPS; i++)
+ req->rq_groups[i] = current->groups[i];
+
+ if ((result = nfsiod_read_setup(req)) >= 0) {
page->count++;
- result = nfsiod_enqueue(req);
- if (result >= 0)
- dprintk("NFS: enqueued async READ request.\n");
- }
- if (result < 0) {
+ nfsiod_enqueue(req);
+ } else {
dprintk("NFS: deferring async READ request.\n");
nfsiod_release(req);
clear_bit(PG_locked, &page->flags);
wake_up(&page->wait);
}

-done:
return result < 0? result : 0;
}

diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/fs/nfs/dir.c linux/fs/nfs/dir.c
--- linux.vanilla/fs/nfs/dir.c Sun Jun 9 13:11:31 1996
+++ linux/fs/nfs/dir.c Mon Jun 17 20:26:26 1996
@@ -505,8 +505,7 @@
return -ENAMETOOLONG;
}
error = nfs_proc_rmdir(NFS_SERVER(dir), NFS_FH(dir), name);
- if (!error)
- nfs_lookup_cache_remove(dir, NULL, name);
+ nfs_lookup_cache_remove(dir, NULL, name);
iput(dir);
return error;
}
@@ -571,8 +570,7 @@
}
if ((error = nfs_sillyrename(dir, name, len)) < 0) {
error = nfs_proc_remove(NFS_SERVER(dir), NFS_FH(dir), name);
- if (!error)
- nfs_lookup_cache_remove(dir, NULL, name);
+ nfs_lookup_cache_remove(dir, NULL, name);
}
iput(dir);
return error;
@@ -630,8 +628,8 @@
}
error = nfs_proc_link(NFS_SERVER(oldinode), NFS_FH(oldinode),
NFS_FH(dir), name);
- if (!error)
- nfs_lookup_cache_remove(dir, oldinode, NULL);
+
+ nfs_lookup_cache_remove(dir, oldinode, NULL);
iput(oldinode);
iput(dir);
return error;
@@ -662,10 +660,9 @@
error = nfs_proc_rename(NFS_SERVER(old_dir),
NFS_FH(old_dir), old_name,
NFS_FH(new_dir), new_name);
- if (!error) {
- nfs_lookup_cache_remove(old_dir, NULL, old_name);
- nfs_lookup_cache_remove(new_dir, NULL, new_name);
- }
+
+ nfs_lookup_cache_remove(old_dir, NULL, old_name);
+ nfs_lookup_cache_remove(new_dir, NULL, new_name);
iput(old_dir);
iput(new_dir);
return error;
diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/fs/nfs/file.c linux/fs/nfs/file.c
--- linux.vanilla/fs/nfs/file.c Sun Jun 9 13:11:31 1996
+++ linux/fs/nfs/file.c Mon Jun 17 20:26:26 1996
@@ -146,7 +146,9 @@
file->f_pos = pos;
if (pos > inode->i_size)
inode->i_size = pos;
- nfs_refresh_inode(inode, &fattr);
+ /* Avoid possible Solaris 2.5 nfsd bug */
+ if (inode->i_ino == fattr.fileid)
+ nfs_refresh_inode(inode, &fattr);
return written;
}

diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/fs/nfs/nfsiod.c linux/fs/nfs/nfsiod.c
--- linux.vanilla/fs/nfs/nfsiod.c Sun Jun 9 13:11:31 1996
+++ linux/fs/nfs/nfsiod.c Mon Jun 17 20:26:26 1996
@@ -37,7 +37,7 @@
* Reserve an nfsiod slot and initialize the request struct
*/
struct nfsiod_req *
-nfsiod_reserve(struct nfs_server *server, nfsiod_done_fn_t callback)
+nfsiod_reserve(struct nfs_server *server)
{
struct nfsiod_req *req;

@@ -56,8 +56,6 @@
}

req->rq_server = server;
- req->rq_callback = callback;
-
return req;
}

@@ -74,21 +72,12 @@
/*
* Transmit a request and put it on nfsiod's list of pending requests.
*/
-int
+void
nfsiod_enqueue(struct nfsiod_req *req)
{
- int result;
-
dprintk("BIO: enqueuing request %p\n", &req->rq_rpcreq);
- result = rpc_transmit(req->rq_server->rsock, &req->rq_rpcreq);
- if (result < 0) {
- dprintk("BIO: rpc_transmit returned %d\n", result);
- } else {
- dprintk("BIO: waking up nfsiod (%p)\n", req->rq_wait);
- wake_up(&req->rq_wait);
- schedule();
- }
- return result;
+ wake_up(&req->rq_wait);
+ schedule();
}

/*
@@ -120,8 +109,10 @@
current->pid);
active++;
dprintk("BIO: before: now %d nfsiod's active\n", active);
- result = nfs_rpc_doio(req->rq_server, &req->rq_rpcreq, 1);
- req->rq_callback(result, req);
+ do {
+ result = nfs_rpc_doio(req->rq_server,
+ &req->rq_rpcreq, 1);
+ } while (!req->rq_callback(result, req));
active--;
}

diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/fs/nfs/proc.c linux/fs/nfs/proc.c
--- linux.vanilla/fs/nfs/proc.c Sun Jun 9 13:11:31 1996
+++ linux/fs/nfs/proc.c Mon Jun 17 20:26:26 1996
@@ -451,13 +451,12 @@
req->rq_addr = &server->toaddr;
req->rq_alen = sizeof(server->toaddr);

- return 0;
+ return rpc_transmit(server->rsock, req);
}

int
-nfs_proc_read_reply(struct rpc_ioreq *req)
+nfs_proc_read_reply(struct rpc_ioreq *req, struct nfs_fattr *fattr)
{
- struct nfs_fattr fattr;
int status;
__u32 *p0, *p;
int count;
@@ -465,9 +464,11 @@
p0 = (__u32 *) req->rq_rvec[0].iov_base;

if (!(p = nfs_rpc_verify(p0))) {
- status = -errno_NFSERR_IO;
+ /* Tell the upper layers to retry */
+ status = -EAGAIN;
+ /* status = -errno_NFSERR_IO; */
} else if ((status = ntohl(*p++)) == NFS_OK) {
- p = xdr_decode_fattr(p, &fattr);
+ p = xdr_decode_fattr(p, fattr);
count = ntohl(*p++);
if (p != req->rq_rvec[2].iov_base) {
/* unexpected RPC reply header size. punt.
diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/include/linux/nfs_fs.h linux/include/linux/nfs_fs.h
--- linux.vanilla/include/linux/nfs_fs.h Sun Jun 9 13:11:41 1996
+++ linux/include/linux/nfs_fs.h Tue Jun 18 00:15:11 1996
@@ -96,7 +96,7 @@
extern int nfs_proc_read_request(struct rpc_ioreq *, struct nfs_server *,
struct nfs_fh *, unsigned long offset,
unsigned long count, __u32 *buf);
-extern int nfs_proc_read_reply(struct rpc_ioreq *);
+extern int nfs_proc_read_reply(struct rpc_ioreq *, struct nfs_fattr *);
extern int *rpc_header(int *p, int procedure, int program, int version,
int uid, int gid, int *groups);
extern int *rpc_verify(int *p);
diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/include/linux/nfsiod.h linux/include/linux/nfsiod.h
--- linux.vanilla/include/linux/nfsiod.h Sun Jun 9 13:11:45 1996
+++ linux/include/linux/nfsiod.h Tue Jun 18 00:27:44 1996
@@ -9,6 +9,7 @@
#define _LINUX_NFSIOD_H

#include <linux/rpcsock.h>
+#include <linux/nfs_fs.h>

#ifdef __KERNEL__

@@ -17,7 +18,7 @@
* Note that the callback procedure must NOT sleep.
*/
struct nfsiod_req;
-typedef void (*nfsiod_done_fn_t)(int result, struct nfsiod_req *);
+typedef int (*nfsiod_callback_t)(int result, struct nfsiod_req *);

/*
* This is the nfsiod request struct.
@@ -25,16 +26,25 @@
struct nfsiod_req {
struct nfsiod_req * rq_next;
struct nfsiod_req * rq_prev;
- struct nfs_server * rq_server;
struct wait_queue * rq_wait;
struct rpc_ioreq rq_rpcreq;
- nfsiod_done_fn_t rq_callback;
- void * rq_cdata;
+ nfsiod_callback_t rq_callback;
+ struct nfs_server * rq_server;
+ struct inode * rq_inode;
+ struct page * rq_page;
+
+ /* user creds */
+ uid_t rq_fsuid;
+ gid_t rq_fsgid;
+ int rq_groups[NGROUPS];
+
+ /* retry handling */
+ int rq_retries;
};

-struct nfsiod_req * nfsiod_reserve(struct nfs_server *, nfsiod_done_fn_t);
+struct nfsiod_req * nfsiod_reserve(struct nfs_server *);
void nfsiod_release(struct nfsiod_req *);
-int nfsiod_enqueue(struct nfsiod_req *);
+void nfsiod_enqueue(struct nfsiod_req *);
int nfsiod(void);


diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/kernel/sched.c linux/kernel/sched.c
--- linux.vanilla/kernel/sched.c Sun Jun 9 13:11:37 1996
+++ linux/kernel/sched.c Sat Jun 22 13:39:39 1996
@@ -942,6 +942,12 @@
}
update_one_process(p, ticks, utime, stime);

+ if (p->priority < DEF_PRIORITY)
+ kstat.cpu_nice += utime;
+ else
+ kstat.cpu_user += utime;
+ kstat.cpu_system += stime;
+
p->counter -= ticks;
if (p->counter >= 0)
continue;
diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/mm/vmscan.c linux/mm/vmscan.c
--- linux.vanilla/mm/vmscan.c Sun Jun 9 13:11:39 1996
+++ linux/mm/vmscan.c Thu Jun 20 20:40:59 1996
@@ -404,6 +404,7 @@
while (1) {
kswapd_awake = 0;
current->signal = 0;
+ run_task_queue(&tq_disk);
interruptible_sleep_on(&kswapd_wait);
kswapd_awake = 1;
swapstats.wakeups++;
diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/net/ipv4/ip_fw.c linux/net/ipv4/ip_fw.c
--- linux.vanilla/net/ipv4/ip_fw.c Sun Jun 9 13:11:59 1996
+++ linux/net/ipv4/ip_fw.c Fri Jun 28 23:14:30 1996
@@ -264,9 +264,11 @@

/*
* Too short.
+ *
+ * But only too short for a packet with ports...
*/

- else if(ntohs(ip->tot_len)<8+(ip->ihl<<2))
+ else if((ntohs(ip->tot_len)<8+(ip->ihl<<2))&&(ip->protocol==IPPROTO_TCP || ip->protocol==IPPROTO_UDP))
return FW_BLOCK;

src = ip->saddr;
diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/net/ipv4/ip_masq_app.c linux/net/ipv4/ip_masq_app.c
--- linux.vanilla/net/ipv4/ip_masq_app.c Sun Jun 9 13:12:01 1996
+++ linux/net/ipv4/ip_masq_app.c Thu Jun 20 20:41:33 1996
@@ -2,7 +2,7 @@
* IP_MASQ_APP application masquerading module
*
*
- * Version: @(#)ip_masq_app.c 0.03 03/96
+ * Version: @(#)ip_masq_app.c 0.04 96/06/17
*
* Author: Juan Jose Ciarlante, <jjciarla@raiz.uncu.edu.ar>
*
@@ -13,7 +13,8 @@
* 2 of the License, or (at your option) any later version.
*
* Fixes:
- * JJC : Implemented also input pkt hook
+ * JJC : Implemented also input pkt hook
+ * Miquel van Smoorenburg : Copy more stuff when resizing skb
*
*
* FIXME:
@@ -502,6 +503,7 @@
{
int maxsize, diff, o_offset;
struct sk_buff *n_skb;
+ int offset;

maxsize = skb->truesize - sizeof(struct sk_buff);

@@ -521,7 +523,9 @@
skb->end = skb->head+n_len;
} else {
/*
- * Sizes differ, make a copy
+ * Sizes differ, make a copy.
+ *
+ * FIXME: move this to core/sbuff.c:skb_grow()
*/

n_skb = alloc_skb(MAX_HEADER + skb->len + diff, pri);
@@ -534,8 +538,22 @@
n_skb->free = skb->free;
skb_reserve(n_skb, MAX_HEADER);
skb_put(n_skb, skb->len + diff);
- n_skb->h.raw = n_skb->data + (skb->h.raw - skb->data);
-
+
+ /*
+ * Copy as much data from the old skb as possible. Even
+ * though we're only forwarding packets, we need stuff
+ * like skb->protocol (PPP driver wants it).
+ */
+ offset = n_skb->data - skb->data;
+ n_skb->h.raw = skb->h.raw + offset;
+ n_skb->when = skb->when;
+ n_skb->dev = skb->dev;
+ n_skb->mac.raw = skb->mac.raw + offset;
+ n_skb->ip_hdr = (struct iphdr *)(((char *)skb->ip_hdr)+offset);
+ n_skb->pkt_type = skb->pkt_type;
+ n_skb->protocol = skb->protocol;
+ n_skb->ip_summed = skb->ip_summed;
+
/*
* Copy pkt in new buffer
*/
diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/net/ipv4/ip_output.c linux/net/ipv4/ip_output.c
--- linux.vanilla/net/ipv4/ip_output.c Sun Jun 9 13:12:01 1996
+++ linux/net/ipv4/ip_output.c Fri Jun 28 23:29:55 1996
@@ -160,9 +160,9 @@
skb->dev = dev;
skb->arp = 1;
skb->protocol = htons(ETH_P_IP);
+ skb_reserve(skb,MAX_HEADER);
if (dev->hard_header)
{
- skb_reserve(skb,MAX_HEADER);
if (rt && dev == rt->rt_dev && rt->rt_hh)
{
memcpy(skb_push(skb,dev->hard_header_len),rt->rt_hh->hh_data,dev->hard_header_len);
@@ -574,7 +574,7 @@
ip_statistics.IpOutRequests++;

#ifdef CONFIG_IP_MULTICAST
- if(sk && MULTICAST(daddr) && *sk->ip_mc_name)
+ if(MULTICAST(daddr) && *sk->ip_mc_name)
{
dev=dev_get(sk->ip_mc_name);
if(!dev)
@@ -710,8 +710,10 @@
}
return 0;
}
- length -= sizeof(struct iphdr);
- if (sk && !sk->ip_hdrincl && opt)
+ if (!sk->ip_hdrincl)
+ length -= sizeof(struct iphdr);
+
+ if(opt)
{
length -= opt->optlen;
fragheaderlen = dev->hard_header_len + sizeof(struct iphdr) + opt->optlen;
diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/net/ipv4/tcp.c linux/net/ipv4/tcp.c
--- linux.vanilla/net/ipv4/tcp.c Sun Jun 9 13:12:00 1996
+++ linux/net/ipv4/tcp.c Mon Jun 17 22:52:46 1996
@@ -551,8 +551,15 @@
if (rt->rt_mtu > new_mtu)
rt->rt_mtu = new_mtu;

+ /*
+ * FIXME::
+ * Not the nicest of fixes: Lose a MTU update if the socket is
+ * locked this instant. Not the right answer but will be best
+ * for the production fix. Make 2.1 work right!
+ */
+
if (sk->mtu > new_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr)
- && new_mtu > sizeof(struct iphdr)+sizeof(struct tcphdr))
+ && new_mtu > sizeof(struct iphdr)+sizeof(struct tcphdr) && !sk->users)
sk->mtu = new_mtu - sizeof(struct iphdr) - sizeof(struct tcphdr);

return;
@@ -943,40 +950,44 @@
return -EPIPE;
}

- /*
- * The following code can result in copy <= if sk->mss is ever
- * decreased. It shouldn't be. sk->mss is min(sk->mtu, sk->max_window).
- * sk->mtu is constant once SYN processing is finished. I.e. we
- * had better not get here until we've seen his SYN and at least one
- * valid ack. (The SYN sets sk->mtu and the ack sets sk->max_window.)
- * But ESTABLISHED should guarantee that. sk->max_window is by definition
- * non-decreasing. Note that any ioctl to set user_mss must be done
- * before the exchange of SYN's. If the initial ack from the other
- * end has a window of 0, max_window and thus mss will both be 0.
- */
+ /*
+ * The following code can result in copy <= if sk->mss is ever
+ * decreased. It shouldn't be. sk->mss is min(sk->mtu, sk->max_window).
+ * sk->mtu is constant once SYN processing is finished. I.e. we
+ * had better not get here until we've seen his SYN and at least one
+ * valid ack. (The SYN sets sk->mtu and the ack sets sk->max_window.)
+ * But ESTABLISHED should guarantee that. sk->max_window is by definition
+ * non-decreasing. Note that any ioctl to set user_mss must be done
+ * before the exchange of SYN's. If the initial ack from the other
+ * end has a window of 0, max_window and thus mss will both be 0.
+ */

- /*
- * Now we need to check if we have a half built packet.
- */
+ /*
+ * Now we need to check if we have a half built packet.
+ */
#ifndef CONFIG_NO_PATH_MTU_DISCOVERY
- /*
- * FIXME: I'm almost sure that this fragment is BUG,
- * but it works... I do not know why 8) --ANK
- *
- * Really, we should rebuild all the queues...
- * It's difficult. Temporary hack is to send all
- * queued segments with allowed fragmentation.
- */
- {
- int new_mss = min(sk->mtu, sk->max_window);
- if (new_mss < sk->mss)
+ /*
+ * FIXME: I'm almost sure that this fragment is BUG,
+ * but it works... I do not know why 8) --ANK
+ *
+ * Really, we should rebuild all the queues...
+ * It's difficult. Temporary hack is to send all
+ * queued segments with allowed fragmentation.
+ */
{
- tcp_send_partial(sk);
- sk->mss = new_mss;
+ int new_mss = min(sk->mtu, sk->max_window);
+ if (new_mss < sk->mss)
+ {
+ tcp_send_partial(sk);
+ sk->mss = new_mss;
+ }
}
- }
#endif

+ /*
+ * If there is a partly filled frame we can fill
+ * out.
+ */
if ((skb = tcp_dequeue_partial(sk)) != NULL)
{
int tcp_size;
@@ -987,11 +998,33 @@
if (!(flags & MSG_OOB))
{
copy = min(sk->mss - tcp_size, seglen);
+
+ /*
+ * Now we may find the frame is as big, or too
+ * big for our MSS. Thats all fine. It means the
+ * MSS shrank (from an ICMP) after we allocated
+ * this frame.
+ */
+
if (copy <= 0)
{
- printk(KERN_CRIT "TCP: **bug**: \"copy\" <= 0\n");
- return -EFAULT;
+ /*
+ * Send the now forced complete frame out.
+ *
+ * Note for 2.1: The MSS reduce code ought to
+ * flush any frames in partial that are now
+ * full sized. Not serious, potential tiny
+ * performance hit.
+ */
+ tcp_send_skb(sk,skb);
+ /*
+ * Get a new buffer and try again.
+ */
+ continue;
}
+ /*
+ * Otherwise continue to fill the buffer.
+ */
tcp_size += copy;
memcpy_fromfs(skb_put(skb,copy), from, copy);
skb->csum = csum_partial(skb->tail - tcp_size, tcp_size, 0);
diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/net/ipv4/tcp_output.c linux/net/ipv4/tcp_output.c
--- linux.vanilla/net/ipv4/tcp_output.c Sun Jun 9 13:12:01 1996
+++ linux/net/ipv4/tcp_output.c Wed Jun 26 23:35:41 1996
@@ -879,15 +879,22 @@
void tcp_send_delayed_ack(struct sock * sk, int max_timeout, unsigned long timeout)
{
unsigned long now;
+ static int delack_guard=0;

+ if(delack_guard)
+ return;
+
+ delack_guard++;
+
/* Calculate new timeout */
now = jiffies;
if (timeout > max_timeout)
timeout = max_timeout;
timeout += now;
if (sk->bytes_rcv >= sk->max_unacked) {
- timeout = now;
- mark_bh(TIMER_BH);
+ tcp_send_ack(sk);
+ delack_guard--;
+ return;
}

/* Use new timeout only if there wasn't a older one earlier */
@@ -896,6 +903,7 @@

sk->ack_backlog++;
add_timer(&sk->delack_timer);
+ delack_guard--;
}


diff --unified --recursive --new-file --exclude-from exclude linux.vanilla/net/socket.c linux/net/socket.c
--- linux.vanilla/net/socket.c Sun Jun 9 13:11:56 1996
+++ linux/net/socket.c Tue Jun 18 00:03:12 1996
@@ -778,7 +778,7 @@
sock_release(newsock);
return(-EINVAL);
}
- sock->file=current->files->fd[fd];
+ newsock->file=current->files->fd[fd];

if (upeer_sockaddr)
{