New version of the 2.1 syncookies patch

kdp0101@hpmail.lrz-muenchen.de
Wed, 04 Jun 1997 14:29:03 METDST


Hi,

Here is a new version of the syncookies patch for 2.1.42. It protects
linux machines against SYN flood attacks.

This version fixes a few bugs and rough ends in my older patch. Please
test this stuff and send me feedback. The patch contains some sysctl
changes from Christopher Horn too (I was too lazy to back them out ;)

Usage:
Install the patch into a clean 2.1.42 kernel and enable syncookies
with echo 1 >/proc/sys/net/ipv4/tcp_syncookies somewhere in your startup
files.

Changes:
- Added a load limit to the "Martian source" kernel message.
- Fixed a bug in tcp_ipv4.c. tcp_v4_do_rcv didn't pass the IP options
to the generic tcp code, so it didn't use the correct source routing
option to send SYNACKs back. This fix makes the code compliant to
RFC1122 4.2.3.8 again (not tested).
- Fixed a bug that the code didn't check the ACK bit before testing
against the tcp open request list.
- Fixed the MSS list for very small MTUs (thanks to Eric Schenk).
- Add a help text to Configure.help (stolen from 2.0.30 ;)
- Some other small changes.

Enjoy,

-Andi Kleen

diff -x .* --recursive -u linux-clean/Documentation/Configure.help linux/Documentation/Configure.help
--- linux-clean/Documentation/Configure.help Sat May 31 14:09:56 1997
+++ linux/Documentation/Configure.help Tue Jun 3 16:57:00 1997
@@ -534,6 +534,19 @@
proxy server). Chances are that you should use this on every machine
being run as a router and not on any regular host. If unsure, say N.

+SYN flood protection
+CONFIG_SYN_COOKIES
+ Normal TCP/IP networking is open to an attack known as SYN flooding.
+ This attack prevents legitimate users from being able to connect to
+ your computer and requires very little work for the attacker.
+ SYN cookies provide protection against this type of attack. With
+ this option turned on the TCP/IP stack will use a cryptographic
+ challenge protocol known as SYN cookies to enable legitimate users
+ to continue to connect, even when your machine is under attack.
+ Note that SYN cookies aren't enabled per default, you need to add
+ echo 1 >/proc/sys/net/ipv4/tcp_syncookies to one of your startup scripts
+ (e.g. /etc/rc.local or /etc/rc.d/rc.local).
+
Socket Security API Support (EXPERIMENTAL)
CONFIG_NET_SECURITY
Enable use of the socket security API. Note that Linux does not include
diff -x .* --recursive -u linux-clean/drivers/char/random.c linux/drivers/char/random.c
--- linux-clean/drivers/char/random.c Mon May 12 19:35:40 1997
+++ linux/drivers/char/random.c Wed May 28 21:56:28 1997
@@ -1431,7 +1431,7 @@
tmp[9]=daddr;
tmp[10]=(sport << 16) + dport;
halfMD4Transform(tmp, tmp+4);
-
+
/*
* As close as possible to RFC 793, which
* suggests using a 250kHz clock.
@@ -1447,6 +1447,63 @@
#endif
return (seq);
}
+
+#ifdef CONFIG_SYN_COOKIES
+/*
+ * Secure SYN cookie computation. This is the algorithm worked out by
+ * Dan Bernstein and Eric Schenk.
+ *
+ * For linux I implement the 1 minute counter by looking at the jiffies clock.
+ * The count is passed in as a parameter;
+ *
+ */
+__u32 secure_tcp_syn_cookie(__u32 saddr, __u32 daddr,
+ __u16 sport, __u16 dport, __u32 sseq, __u32 count)
+{
+ static int is_init = 0;
+ static __u32 secret[2][16];
+ __u32 tmp[16];
+ __u32 seq;
+
+ /*
+ * Pick two random secret the first time we open a TCP connection.
+ */
+ if (is_init == 0) {
+ get_random_bytes(&secret[0], sizeof(secret[0]));
+ get_random_bytes(&secret[1], sizeof(secret[1]));
+ is_init = 1;
+ }
+
+ /*
+ * Compute the secure sequence number.
+ * The output should be:
+ * MD5(sec1,saddr,sport,daddr,dport,sec1) + their sequence number
+ * + (count * 2^24)
+ * + (MD5(sec2,saddr,sport,daddr,dport,count,sec2) % 2^24).
+ * Where count increases every minute by 1.
+ */
+
+ memcpy(tmp, secret[0], sizeof(tmp));
+ tmp[8]=saddr;
+ tmp[9]=daddr;
+ tmp[10]=(sport << 16) + dport;
+ HASH_TRANSFORM(tmp, tmp);
+ seq = tmp[1];
+
+ memcpy(tmp, secret[1], sizeof(tmp));
+ tmp[8]=saddr;
+ tmp[9]=daddr;
+ tmp[10]=(sport << 16) + dport;
+ tmp[11]=count; /* minute counter */
+ HASH_TRANSFORM(tmp, tmp);
+
+ seq += sseq + (count << 24) + (tmp[1] & 0x00ffffff);
+
+ /* Zap lower 3 bits to leave room for the MSS representation */
+ return (seq & 0xfffff8);
+}
+#endif
+

#ifdef RANDOM_BENCHMARK
/*
Only in linux/drivers/char: random.c~
diff -x .* --recursive -u linux-clean/include/linux/net.h linux/include/linux/net.h
--- linux-clean/include/linux/net.h Fri May 16 00:52:09 1997
+++ linux/include/linux/net.h Sun Jun 1 14:24:18 1997
@@ -136,5 +136,8 @@
extern int sock_recvmsg(struct socket *, struct msghdr *m, int len, int flags);
extern int sock_readv_writev(int type, struct inode * inode, struct file * file,
const struct iovec * iov, long count, long size);
+
+int net_ratelimit(void);
+
#endif /* __KERNEL__ */
#endif /* _LINUX_NET_H */
diff -x .* --recursive -u linux-clean/include/linux/random.h linux/include/linux/random.h
--- linux-clean/include/linux/random.h Thu Jun 6 12:42:15 1996
+++ linux/include/linux/random.h Wed May 28 16:50:14 1997
@@ -55,6 +55,8 @@

extern __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr,
__u16 sport, __u16 dport);
+extern __u32 secure_tcp_syn_cookie(__u32 saddr, __u32 daddr,
+ __u16 sport, __u16 dport, __u32 sseq, __u32 count);

#ifndef MODULE
extern struct file_operations random_fops, urandom_fops;
diff -x .* --recursive -u linux-clean/include/linux/sysctl.h linux/include/linux/sysctl.h
--- linux-clean/include/linux/sysctl.h Sat May 31 14:10:06 1997
+++ linux/include/linux/sysctl.h Sat May 31 12:22:48 1997
@@ -75,7 +75,6 @@
enum
{
VM_SWAPCTL=1, /* struct: Set vm swapping control */
- VM_KSWAPD, /* struct: control background pageout */
VM_SWAPOUT, /* int: Background pageout interval */
VM_FREEPG, /* struct: Set free page thresholds */
VM_BDFLUSH, /* struct: Control buffer cache flushing */
@@ -110,6 +109,7 @@
NET_CORE_RMEM_MAX,
NET_CORE_WMEM_DEFAULT,
NET_CORE_RMEM_DEFAULT,
+ NET_CORE_DESTROY_DELAY,
};

/* /proc/sys/net/ethernet */
@@ -118,12 +118,19 @@

/* /proc/sys/net/unix */

+enum
+{
+ NET_UNIX_DESTROY_DELAY=1,
+ NET_UNIX_DELETE_DELAY,
+};
+
/* /proc/sys/net/ipv4 */
enum
{
NET_IPV4_ARP_RES_TIME=1,
NET_IPV4_ARP_DEAD_RES_TIME,
NET_IPV4_ARP_MAX_TRIES,
+ NET_IPV4_ARP_MAX_PINGS,
NET_IPV4_ARP_TIMEOUT,
NET_IPV4_ARP_CHECK_INTERVAL,
NET_IPV4_ARP_CONFIRM_INTERVAL,
@@ -147,9 +154,22 @@
NET_IPV4_ACCEPT_REDIRECTS,
NET_IPV4_SECURE_REDIRECTS,
NET_IPV4_RFC1620_REDIRECTS,
- NET_TCP_SYN_RETRIES,
- NET_IPFRAG_HIGH_THRESH,
- NET_IPFRAG_LOW_THRESH,
+ NET_IPV4_TCP_SYN_RETRIES,
+ NET_IPV4_IPFRAG_HIGH_THRESH,
+ NET_IPV4_IPFRAG_LOW_THRESH,
+ NET_IPV4_IPFRAG_TIME,
+ NET_IPV4_TCP_MAX_KA_PROBES,
+ NET_IPV4_TCP_KEEPALIVE_TIME,
+ NET_IPV4_TCP_KEEPALIVE_PROBES,
+ NET_IPV4_TCP_RETRIES1,
+ NET_IPV4_TCP_RETRIES2,
+ NET_IPV4_TCP_MAX_DELAY_ACKS,
+ NET_IPV4_TCP_FIN_TIMEOUT,
+ NET_IPV4_IGMP_MAX_HOST_REPORT_DELAY,
+ NET_IPV4_IGMP_TIMER_SCALE,
+ NET_IPV4_IGMP_AGE_THRESHOLD,
+ NET_TCP_SYNCOOKIES,
+ NET_TCP_ALWAYS_SYNCOOKIE,
};


@@ -269,6 +289,8 @@
void *, size_t *);
extern int proc_dointvec_minmax(ctl_table *, int, struct file *,
void *, size_t *);
+extern int proc_dointvec_jiffies(ctl_table *, int, struct file *,
+ void *, size_t *);

extern int do_sysctl (int *name, int nlen,
void *oldval, size_t *oldlenp,
diff -x .* --recursive -u linux-clean/include/net/tcp.h linux/include/net/tcp.h
--- linux-clean/include/net/tcp.h Tue May 27 13:52:55 1997
+++ linux/include/net/tcp.h Sun Jun 1 14:27:55 1997
@@ -281,15 +281,17 @@

int (*conn_request) (struct sock *sk,
struct sk_buff *skb,
- void *opt,
- __u32 isn);
+ void *opt, __u32 isn);

struct sock * (*syn_recv_sock) (struct sock *sk,
struct sk_buff *skb,
- struct open_request *req);
+ struct open_request *req,
+ struct dst_entry *dst);

+#if 0
__u32 (*init_sequence) (struct sock *sk,
struct sk_buff *skb);
+#endif

struct sock * (*get_sock) (struct sk_buff *skb,
struct tcphdr *th);
@@ -385,7 +387,8 @@
int len, int nonblock,
int flags, int *addr_len);

-extern void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp);
+extern void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp,
+ int no_fancy);

/*
* TCP v4 functions exported for the inet6 API
@@ -407,7 +410,8 @@

extern struct sock * tcp_v4_syn_recv_sock(struct sock *sk,
struct sk_buff *skb,
- struct open_request *req);
+ struct open_request *req,
+ struct dst_entry *dst);

extern int tcp_v4_do_rcv(struct sock *sk,
struct sk_buff *skb);
@@ -417,6 +421,12 @@
int addr_len);


+/* From syncookies.c */
+extern struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
+ struct ip_options *opt);
+extern __u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb,
+ __u16 *mss);
+
extern void tcp_read_wakeup(struct sock *);
extern void tcp_write_xmit(struct sock *);
extern void tcp_time_wait(struct sock *);
@@ -521,7 +531,6 @@
{
return csum_tcpudp_magic(saddr,daddr,len,IPPROTO_TCP,base);
}
-

#undef STATE_TRACE

Only in linux/include/net: tcp.h~
diff -x .* --recursive -u linux-clean/kernel/sysctl.c linux/kernel/sysctl.c
--- linux-clean/kernel/sysctl.c Mon May 12 19:35:44 1997
+++ linux/kernel/sysctl.c Fri May 30 13:05:32 1997
@@ -37,9 +37,7 @@

/* External variables not in a header file. */
extern int panic_timeout;
-extern int console_loglevel, default_message_loglevel;
-extern int minimum_console_loglevel, default_console_loglevel;
-extern int C_A_D, swapout_interval;
+extern int console_loglevel, C_A_D, swapout_interval;
extern int bdf_prm[], bdflush_min[], bdflush_max[];
extern char binfmt_java_interpreter[], binfmt_java_appletviewer[];
extern int sysctl_overcommit_memory;
@@ -184,6 +182,8 @@
static ctl_table vm_table[] = {
{VM_SWAPCTL, "swapctl",
&swap_control, sizeof(swap_control_t), 0600, NULL, &proc_dointvec},
+ {VM_SWAPOUT, "swapout_interval",
+ &swapout_interval, sizeof(int), 0600, NULL, &proc_dointvec_jiffies},
{VM_FREEPG, "freepages",
&min_free_pages, 3*sizeof(int), 0600, NULL, &proc_dointvec},
{VM_BDFLUSH, "bdflush", &bdf_prm, 9*sizeof(int), 0600, NULL,
@@ -611,8 +611,8 @@
return 0;
}

-int proc_dointvec(ctl_table *table, int write, struct file *filp,
- void *buffer, size_t *lenp)
+static int do_proc_dointvec(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp, int conv)
{
int *i, vleft, first=1, len, left, neg, val;
#define TMPBUFLEN 20
@@ -655,7 +655,7 @@
}
if (*p < '0' || *p > '9')
break;
- val = simple_strtoul(p, &p, 0);
+ val = simple_strtoul(p, &p, 0) * conv;
len = p-buf;
if ((len < left) && *p && !isspace(*p))
break;
@@ -668,7 +668,7 @@
p = buf;
if (!first)
*p++ = '\t';
- sprintf(p, "%d", *i);
+ sprintf(p, "%d", (*i) / conv);
len = strlen(buf);
if (len > left)
len = left;
@@ -702,6 +702,12 @@
return 0;
}

+int proc_dointvec(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ return do_proc_dointvec(table,write,filp,buffer,lenp,1);
+}
+
int proc_dointvec_minmax(ctl_table *table, int write, struct file *filp,
void *buffer, size_t *lenp)
{
@@ -798,6 +804,13 @@
*lenp -= left;
filp->f_pos += *lenp;
return 0;
+}
+
+/* Like proc_dointvec, but converts seconds to jiffies */
+int proc_dointvec_jiffies(ctl_table *table, int write, struct file *filp,
+ void *buffer, size_t *lenp)
+{
+ return do_proc_dointvec(table,write,filp,buffer,lenp,HZ);
}

#else /* CONFIG_PROC_FS */
Only in linux/kernel: sysctl.c~
diff -x .* --recursive -u linux-clean/net/core/sock.c linux/net/core/sock.c
--- linux-clean/net/core/sock.c Mon May 12 19:35:44 1997
+++ linux/net/core/sock.c Thu May 29 22:09:46 1997
@@ -124,6 +124,8 @@
__u32 sysctl_wmem_default = SK_WMEM_MAX;
__u32 sysctl_rmem_default = SK_RMEM_MAX;

+int sysctl_core_destroy_delay = SOCK_DESTROY_TIME;
+
/*
* This is meant for all protocols to use and covers goings on
* at the socket level. Everything here is generic.
@@ -787,7 +789,7 @@
* Someone is using our buffers still.. defer
*/
init_timer(&sk->timer);
- sk->timer.expires=jiffies+10*HZ;
+ sk->timer.expires=jiffies+sysctl_core_destroy_delay;
sk->timer.function=sklist_destroy_timer;
sk->timer.data = (unsigned long)sk;
add_timer(&sk->timer);
diff -x .* --recursive -u linux-clean/net/core/sysctl_net_core.c linux/net/core/sysctl_net_core.c
--- linux-clean/net/core/sysctl_net_core.c Thu May 15 03:21:31 1997
+++ linux/net/core/sysctl_net_core.c Thu May 29 22:20:03 1997
@@ -13,6 +13,8 @@
extern __u32 sysctl_wmem_default;
extern __u32 sysctl_rmem_default;

+extern int sysctl_core_destroy_delay;
+
ctl_table core_table[] = {
{NET_CORE_WMEM_MAX, "wmem_max",
&sysctl_wmem_max, sizeof(int), 0644, NULL,
@@ -26,5 +28,8 @@
{NET_CORE_RMEM_DEFAULT, "rmem_default",
&sysctl_rmem_default, sizeof(int), 0644, NULL,
&proc_dointvec},
+ {NET_CORE_DESTROY_DELAY, "destroy_delay",
+ &sysctl_core_destroy_delay, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
{ 0 }
};
Only in linux/net/core: sysctl_net_core.c~
diff -x .* --recursive -u linux-clean/net/ipv4/Config.in linux/net/ipv4/Config.in
--- linux-clean/net/ipv4/Config.in Fri Feb 7 14:54:55 1997
+++ linux/net/ipv4/Config.in Sun Jun 1 14:18:47 1997
@@ -31,6 +31,7 @@
bool 'IP: ARP daemon support (EXPERIMENTAL)' CONFIG_ARPD
fi
fi
+bool 'IP: TCP syncookie support (not enabled per default) ' CONFIG_SYN_COOKIES
comment '(it is safe to leave these untouched)'
bool 'IP: PC/TCP compatibility mode' CONFIG_INET_PCTCP
tristate 'IP: Reverse ARP' CONFIG_INET_RARP
Only in linux/net/ipv4: Config.in~
diff -x .* --recursive -u linux-clean/net/ipv4/Makefile linux/net/ipv4/Makefile
--- linux-clean/net/ipv4/Makefile Fri Mar 21 03:17:13 1997
+++ linux/net/ipv4/Makefile Tue May 27 22:40:14 1997
@@ -52,6 +52,11 @@
endif
endif

+ifeq ($(CONFIG_SYN_COOKIES),y)
+IPV4_OBJS += syncookies.o
+# module not supported, because it would be too messy.
+endif
+
ifdef CONFIG_INET
O_OBJS := $(IPV4_OBJS)
OX_OBJS := $(IPV4X_OBJS)
Only in linux/net/ipv4: Makefile~
diff -x .* --recursive -u linux-clean/net/ipv4/af_inet.c linux/net/ipv4/af_inet.c
--- linux-clean/net/ipv4/af_inet.c Mon May 12 19:35:44 1997
+++ linux/net/ipv4/af_inet.c Mon Jun 2 20:35:27 1997
@@ -114,6 +114,7 @@

#define min(a,b) ((a)<(b)?(a):(b))

+extern int sysctl_core_destroy_delay;
extern struct proto packet_prot;
extern int raw_get_info(char *, char **, off_t, int, int);
extern int snmp_get_info(char *, char **, off_t, int, int);
@@ -190,7 +191,7 @@
sk->destroy = 1;
sk->ack_backlog = 0;
release_sock(sk);
- net_reset_timer(sk, TIME_DESTROY, SOCK_DESTROY_TIME);
+ net_reset_timer(sk, TIME_DESTROY, sysctl_core_destroy_delay);
}

void destroy_sock(struct sock *sk)
Only in linux/net/ipv4: af_inet.c~
diff -x .* --recursive -u linux-clean/net/ipv4/igmp.c linux/net/ipv4/igmp.c
--- linux-clean/net/ipv4/igmp.c Mon Mar 31 22:52:32 1997
+++ linux/net/ipv4/igmp.c Thu May 29 22:09:47 1997
@@ -88,6 +88,9 @@
#include <linux/igmp.h>
#include <net/checksum.h>

+int sysctl_igmp_max_host_report_delay = IGMP_MAX_HOST_REPORT_DELAY;
+int sysctl_igmp_timer_scale = IGMP_TIMER_SCALE;
+int sysctl_igmp_age_threshold = IGMP_AGE_THRESHOLD;

/*
* If time expired, change the router type to IGMP_NEW_ROUTER.
@@ -133,7 +136,7 @@
return NULL;
i->dev = dev;
i->type = IGMP_NEW_ROUTER;
- i->time = IGMP_AGE_THRESHOLD;
+ i->time = sysctl_igmp_age_threshold;
i->next = ip_router_info_head;
ip_router_info_head = i;

@@ -229,7 +232,7 @@
int tv;
if(im->tm_running)
return;
- tv=random()%(max_resp_time*HZ/IGMP_TIMER_SCALE); /* Pick a number any number 8) */
+ tv=random()%(max_resp_time*HZ/sysctl_igmp_timer_scale); /* Pick a number any number 8) */
im->timer.expires=jiffies+tv;
im->tm_running=1;
add_timer(&im->timer);
@@ -363,7 +366,7 @@
if (group && group != im->multiaddr)
continue;
if(im->tm_running) {
- if(im->timer.expires>jiffies+max_resp_time*HZ/IGMP_TIMER_SCALE) {
+ if(im->timer.expires>jiffies+max_resp_time*HZ/sysctl_igmp_timer_scale) {
igmp_stop_timer(im);
igmp_start_timer(im,max_resp_time);
}
@@ -372,9 +375,9 @@
}
} else {
mrouter_type=IGMP_OLD_ROUTER;
- max_resp_time=IGMP_MAX_HOST_REPORT_DELAY*IGMP_TIMER_SCALE;
+ max_resp_time=sysctl_igmp_max_host_report_delay*sysctl_igmp_timer_scale;

- if(igmp_set_mrouter_info(dev,mrouter_type,IGMP_AGE_THRESHOLD)==NULL)
+ if(igmp_set_mrouter_info(dev,mrouter_type,sysctl_igmp_age_threshold)==NULL)
return;

/*
diff -x .* --recursive -u linux-clean/net/ipv4/ip_fragment.c linux/net/ipv4/ip_fragment.c
--- linux-clean/net/ipv4/ip_fragment.c Tue May 27 13:47:54 1997
+++ linux/net/ipv4/ip_fragment.c Thu May 29 22:09:47 1997
@@ -41,6 +41,8 @@
int sysctl_ipfrag_high_thresh = 256*1024;
int sysctl_ipfrag_low_thresh = 192*1024;

+int sysctl_ipfrag_time = IP_FRAG_TIME;
+
/* Describe an IP fragment. */
struct ipfrag {
int offset; /* offset of fragment in IP datagram */
@@ -251,7 +253,7 @@
qp->dev = skb->dev;

/* Start a timer for this entry. */
- qp->timer.expires = jiffies + IP_FRAG_TIME; /* about 30 seconds */
+ qp->timer.expires = jiffies + sysctl_ipfrag_time; /* about 30 seconds */
qp->timer.data = (unsigned long) qp; /* pointer to queue */
qp->timer.function = ip_expire; /* expire function */
add_timer(&qp->timer);
@@ -417,7 +419,7 @@
memcpy(qp->iph, iph, ihl+8);
}
del_timer(&qp->timer);
- qp->timer.expires = jiffies + IP_FRAG_TIME; /* about 30 seconds */
+ qp->timer.expires = jiffies + sysctl_ipfrag_time; /* about 30 seconds */
qp->timer.data = (unsigned long) qp; /* pointer to queue */
qp->timer.function = ip_expire; /* expire function */
add_timer(&qp->timer);
diff -x .* --recursive -u linux-clean/net/ipv4/route.c linux/net/ipv4/route.c
--- linux-clean/net/ipv4/route.c Mon May 12 19:35:44 1997
+++ linux/net/ipv4/route.c Sun Jun 1 14:23:25 1997
@@ -45,6 +45,7 @@
* Pavel Krauz : Limited broadcast fixed
* Alexey Kuznetsov : End of old history. Splitted to fib.c and
* route.c and rewritten from scratch.
+ * Andi Kleen : Load-limit warning messages.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -568,7 +569,7 @@
return;

reject_redirect:
- if (ipv4_config.log_martians)
+ if (ipv4_config.log_martians && net_ratelimit())
printk(KERN_INFO "Redirect from %lX/%s to %lX ignored."
"Path = %lX -> %lX, tos %02x\n",
ntohl(old_gw), dev->name, ntohl(new_gw),
@@ -636,7 +637,7 @@
if (jiffies - rt->last_error > (RT_REDIRECT_LOAD<<rt->errors)) {
icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway);
rt->last_error = jiffies;
- if (ipv4_config.log_martians && ++rt->errors == RT_REDIRECT_NUMBER)
+ if (ipv4_config.log_martians && ++rt->errors == RT_REDIRECT_NUMBER && net_ratelimit())
printk(KERN_WARNING "host %08x/%s ignores redirects for %08x to %08x.\n",
rt->rt_src, rt->rt_src_dev->name, rt->rt_dst, rt->rt_gateway);
}
@@ -1083,12 +1084,12 @@
* Do not cache martian addresses: they should be logged (RFC1812)
*/
martian_destination:
- if (ipv4_config.log_martians)
+ if (ipv4_config.log_martians && net_ratelimit())
printk(KERN_WARNING "martian destination %08x from %08x, dev %s\n", daddr, saddr, dev->name);
return -EINVAL;

martian_source:
- if (ipv4_config.log_martians) {
+ if (ipv4_config.log_martians && net_ratelimit()) {
/*
* RFC1812 recommenadtion, if source is martian,
* the only hint is MAC header.
diff -x .* --recursive -u linux-clean/net/ipv4/sysctl_net_ipv4.c linux/net/ipv4/sysctl_net_ipv4.c
--- linux-clean/net/ipv4/sysctl_net_ipv4.c Tue May 27 13:47:54 1997
+++ linux/net/ipv4/sysctl_net_ipv4.c Sat May 31 16:32:47 1997
@@ -34,10 +34,17 @@
extern int sysctl_arp_check_interval;
extern int sysctl_arp_confirm_interval;
extern int sysctl_arp_confirm_timeout;
+extern int sysctl_arp_max_pings;

/* From ip_fragment.c */
extern int sysctl_ipfrag_low_thresh;
extern int sysctl_ipfrag_high_thresh;
+extern int sysctl_ipfrag_time;
+
+/* From igmp.c */
+extern int sysctl_igmp_max_host_report_delay;
+extern int sysctl_igmp_timer_scale;
+extern int sysctl_igmp_age_threshold;

extern int sysctl_tcp_cong_avoidance;
extern int sysctl_tcp_hoe_retransmits;
@@ -45,7 +52,16 @@
extern int sysctl_tcp_tsack;
extern int sysctl_tcp_timestamps;
extern int sysctl_tcp_window_scaling;
-extern int sysctl_syn_retries;
+extern int sysctl_tcp_keepalive_time;
+extern int sysctl_tcp_keepalive_probes;
+extern int sysctl_tcp_max_ka_probes;
+extern int sysctl_tcp_retries1;
+extern int sysctl_tcp_retries2;
+extern int sysctl_tcp_max_delay_acks;
+extern int sysctl_tcp_fin_timeout;
+extern int sysctl_tcp_syncookies;
+extern int sysctl_tcp_always_syncookie;
+extern int sysctl_tcp_syn_retries;

extern int tcp_sysctl_congavoid(ctl_table *ctl, int write, struct file * filp,
void *buffer, size_t *lenp);
@@ -82,6 +98,8 @@
&sysctl_arp_dead_res_time, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_IPV4_ARP_MAX_TRIES, "arp_max_tries",
&sysctl_arp_max_tries, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPV4_ARP_MAX_PINGS, "arp_max_pings",
+ &sysctl_arp_max_pings, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_IPV4_ARP_TIMEOUT, "arp_timeout",
&sysctl_arp_timeout, sizeof(int), 0644, NULL, &proc_dointvec},
{NET_IPV4_ARP_CHECK_INTERVAL, "arp_check_interval",
@@ -149,12 +167,48 @@
{NET_IPV4_RFC1620_REDIRECTS, "ip_rfc1620_redirects",
&ipv4_config.rfc1620_redirects, sizeof(int), 0644, NULL,
&proc_dointvec},
- {NET_TCP_SYN_RETRIES, "tcp_syn_retries",
- &sysctl_syn_retries, sizeof(int), 0644, NULL, &proc_dointvec},
- {NET_IPFRAG_HIGH_THRESH, "ipfrag_high_thresh",
- &sysctl_ipfrag_high_thresh, sizeof(int), 0644, NULL, &proc_dointvec},
- {NET_IPFRAG_LOW_THRESH, "ipfrag_low_thresh",
- &sysctl_ipfrag_low_thresh, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPV4_TCP_SYN_RETRIES, "tcp_syn_retries",
+ &sysctl_tcp_syn_retries, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPV4_IPFRAG_HIGH_THRESH, "ipfrag_high_thresh",
+ &sysctl_ipfrag_high_thresh, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPV4_IPFRAG_LOW_THRESH, "ipfrag_low_thresh",
+ &sysctl_ipfrag_low_thresh, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPV4_IPFRAG_TIME, "ipfrag_time",
+ &sysctl_ipfrag_time, sizeof(int), 0644, NULL, &proc_dointvec_jiffies},
+ {NET_IPV4_TCP_MAX_KA_PROBES, "tcp_max_ka_probes",
+ &sysctl_tcp_max_ka_probes, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPV4_TCP_KEEPALIVE_TIME, "tcp_keepalive_time",
+ &sysctl_tcp_keepalive_time, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_IPV4_TCP_KEEPALIVE_PROBES, "tcp_keepalive_probes",
+ &sysctl_tcp_keepalive_probes, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_TCP_RETRIES1, "tcp_retries1",
+ &sysctl_tcp_retries1, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPV4_TCP_RETRIES2, "tcp_retries2",
+ &sysctl_tcp_retries2, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPV4_TCP_MAX_DELAY_ACKS, "tcp_max_delay_acks",
+ &sysctl_tcp_max_delay_acks, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_IPV4_TCP_FIN_TIMEOUT, "tcp_fin_timeout",
+ &sysctl_tcp_fin_timeout, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_IPV4_IGMP_MAX_HOST_REPORT_DELAY, "igmp_max_host_report_delay",
+ &sysctl_igmp_max_host_report_delay, sizeof(int), 0644, NULL,
+ &proc_dointvec},
+ {NET_IPV4_IGMP_TIMER_SCALE, "igmp_timer_scale",
+ &sysctl_igmp_timer_scale, sizeof(int), 0644, NULL, &proc_dointvec},
+#if 0
+ /* This one shouldn't be exposed to the user (too implementation
+ specific): */
+ {NET_IPV4_IGMP_AGE_THRESHOLD, "igmp_age_threshold",
+ &sysctl_igmp_age_threshold, sizeof(int), 0644, NULL, &proc_dointvec},
+#endif
+#ifdef CONFIG_SYN_COOKIES
+ {NET_TCP_SYNCOOKIES, "tcp_syncookies",
+ &sysctl_tcp_syncookies, sizeof(int), 0644, NULL, &proc_dointvec},
+ {NET_TCP_ALWAYS_SYNCOOKIE, "tcp_always_syncookie",
+ &sysctl_tcp_always_syncookie, sizeof(int), 0644, NULL, &proc_dointvec},
+#endif
{0}
};

Only in linux/net/ipv4: sysctl_net_ipv4.c~
diff -x .* --recursive -u linux-clean/net/ipv4/tcp.c linux/net/ipv4/tcp.c
--- linux-clean/net/ipv4/tcp.c Thu May 15 00:01:21 1997
+++ linux/net/ipv4/tcp.c Thu May 29 22:09:47 1997
@@ -431,6 +431,8 @@

#include <asm/uaccess.h>

+int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT;
+
unsigned long seq_offset;
struct tcp_mib tcp_statistics;

@@ -1385,7 +1387,7 @@
if(timer_active)
add_timer(&sk->timer);
else
- tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_FIN_TIMEOUT);
+ tcp_reset_msl_timer(sk, TIME_CLOSE, sysctl_tcp_fin_timeout);
}

return send_fin;
@@ -1499,7 +1501,7 @@
if(timer_active)
add_timer(&sk->timer);
else
- tcp_reset_msl_timer(sk, TIME_CLOSE, TCP_FIN_TIMEOUT);
+ tcp_reset_msl_timer(sk, TIME_CLOSE, sysctl_tcp_fin_timeout);
}

sk->dead = 1;
diff -x .* --recursive -u linux-clean/net/ipv4/tcp_input.c linux/net/ipv4/tcp_input.c
--- linux-clean/net/ipv4/tcp_input.c Mon May 12 19:35:44 1997
+++ linux/net/ipv4/tcp_input.c Sat May 31 21:09:00 1997
@@ -56,13 +56,15 @@
static void tcp_cong_avoid_vegas(struct sock *sk, u32 seq, u32 ack,
u32 seq_rtt);

-int sysctl_tcp_cong_avoidance = 0;
-int sysctl_tcp_hoe_retransmits = 0;
-int sysctl_tcp_sack = 0;
-int sysctl_tcp_tsack = 0;
-int sysctl_tcp_timestamps = 0;
-int sysctl_tcp_window_scaling = 0;
-
+int sysctl_tcp_cong_avoidance;
+int sysctl_tcp_hoe_retransmits;
+int sysctl_tcp_sack;
+int sysctl_tcp_tsack;
+int sysctl_tcp_timestamps;
+int sysctl_tcp_window_scaling;
+int sysctl_tcp_syncookies;
+int sysctl_tcp_always_syncookie;
+int sysctl_tcp_max_delay_acks = MAX_DELAY_ACK;

static tcp_sys_cong_ctl_t tcp_sys_cong_ctl_f = &tcp_cong_avoid_vanj;

@@ -286,7 +288,7 @@
* FIXME: surely this can be more efficient. -- erics
*/

-void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp)
+void tcp_parse_options(struct tcphdr *th, struct tcp_opt *tp, int no_fancy)
{
unsigned char *ptr;
int length=(th->doff*4)-sizeof(struct tcphdr);
@@ -321,21 +323,21 @@
break;
case TCPOPT_WINDOW:
if(opsize==TCPOLEN_WINDOW && th->syn)
- if (sysctl_tcp_window_scaling) {
+ if (!no_fancy && sysctl_tcp_window_scaling) {
tp->wscale_ok = 1;
tp->snd_wscale = *(__u8 *)ptr;
}
break;
case TCPOPT_SACK_PERM:
if(opsize==TCPOLEN_SACK_PERM && th->syn)
- if (sysctl_tcp_sack)
+ if (sysctl_tcp_sack && !no_fancy)
tp->sack_ok = 1;
case TCPOPT_TIMESTAMP:
if(opsize==TCPOLEN_TIMESTAMP) {
/* Cheaper to set again then to
* test syn. Optimize this?
*/
- if (sysctl_tcp_timestamps)
+ if (sysctl_tcp_timestamps && !no_fancy)
tp->tstamp_ok = 1;
tp->saw_tstamp = 1;
tp->rcv_tsval = ntohl(*(__u32 *)ptr);
@@ -343,6 +345,8 @@
}
break;
case TCPOPT_SACK:
+ if (no_fancy)
+ break;
tp->sacks = (opsize-2)>>3;
if (tp->sacks<<3 == opsize-2) {
int i;
@@ -383,7 +387,7 @@
return 1;
}
}
- tcp_parse_options(th,tp);
+ tcp_parse_options(th,tp,0);
return 1;
}

@@ -1080,7 +1084,7 @@
/* A retransmit, 2nd most common case. Force an imediate ack. */
SOCK_DEBUG(sk, "retransmit received: seq %X\n", skb->seq);

- tp->delayed_acks = MAX_DELAY_ACK;
+ tp->delayed_acks = sysctl_tcp_max_delay_acks;
kfree_skb(skb, FREE_READ);
return;
}
@@ -1094,7 +1098,7 @@
}

/* Ok. This is an out_of_order segment, force an ack. */
- tp->delayed_acks = MAX_DELAY_ACK;
+ tp->delayed_acks = sysctl_tcp_max_delay_acks;

/* Disable header predition. */
tp->pred_flags = 0;
@@ -1218,7 +1222,7 @@
return;
}

- if (tp->delayed_acks >= MAX_DELAY_ACK || tcp_raise_window(sk))
+ if (tp->delayed_acks >= sysctl_tcp_max_delay_acks || tcp_raise_window(sk))
tcp_send_ack(sk);
else
tcp_send_delayed_ack(sk, HZ/2);
@@ -1457,13 +1461,11 @@
/* These use the socket TOS..
* might want to be the received TOS
*/
- if(th->ack)
+ if(th->ack)
return 1; /* send reset */

if(th->syn) {
- __u32 isn = tp->af_specific->init_sequence(sk, skb);
-
- if(tp->af_specific->conn_request(sk, skb, opt, isn) < 0)
+ if(tp->af_specific->conn_request(sk, skb, opt, 0) < 0)
return 1;

/* Now we have several options: In theory there is
@@ -1529,7 +1531,7 @@
tp->fin_seq = skb->seq;

tcp_set_state(sk, TCP_ESTABLISHED);
- tcp_parse_options(th,tp);
+ tcp_parse_options(th,tp,0);
/* FIXME: need to make room for SACK still */
if (tp->wscale_ok == 0) {
tp->snd_wscale = tp->rcv_wscale = 0;
@@ -1572,7 +1574,7 @@
* tcp_connect.
*/
tcp_set_state(sk, TCP_SYN_RECV);
- tcp_parse_options(th,tp);
+ tcp_parse_options(th,tp,0);
if (tp->saw_tstamp) {
tp->ts_recent = tp->rcv_tsval;
tp->ts_recent_stamp = jiffies;
@@ -1614,6 +1616,8 @@
sk->shutdown = SHUTDOWN_MASK;

isn = tp->rcv_nxt + 128000;
+ if (isn == 0)
+ isn++;

sk = tp->af_specific->get_sock(skb, th);

@@ -1708,8 +1712,10 @@
tp->snd_wl1 = skb->seq;
tp->snd_wl2 = skb->ack_seq;

- } else
+ } else {
+ printk(KERN_DEBUG "bad ack\n");
return 1;
+ }
break;

case TCP_FIN_WAIT1:
Only in linux/net/ipv4: tcp_input.c~
diff -x .* --recursive -u linux-clean/net/ipv4/tcp_ipv4.c linux/net/ipv4/tcp_ipv4.c
--- linux-clean/net/ipv4/tcp_ipv4.c Thu May 15 00:01:21 1997
+++ linux/net/ipv4/tcp_ipv4.c Tue Jun 3 00:55:30 1997
@@ -30,6 +30,9 @@
* David S. Miller : Change semantics of established hash,
* half is devoted to TIME_WAIT sockets
* and the rest go in the other half.
+ * Andi Kleen : Add support for syncookies and fixed
+ * some bugs: ip options weren't passed to
+ * the TCP layer, missed a check for an ACK bit.
*/

#include <linux/config.h>
@@ -48,6 +51,8 @@
extern int sysctl_tcp_tsack;
extern int sysctl_tcp_timestamps;
extern int sysctl_tcp_window_scaling;
+extern int sysctl_tcp_syncookies;
+extern int sysctl_tcp_always_syncookie;

static void tcp_v4_send_reset(struct sk_buff *skb);

@@ -403,7 +408,7 @@

#endif

-static __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
+static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
{
return secure_tcp_sequence_number(sk->saddr, sk->daddr,
skb->h.th->dest,
@@ -835,6 +840,8 @@

/* Don't offer more than they did.
* This way we don't have to memorize who said what.
+ * FIXME: maybe this should be changed for better performance
+ * with syncookies.
*/
req->mss = min(mss, req->mss);

@@ -891,17 +898,13 @@
sizeof(struct ip_options) + req->af.v4_req.opt->optlen);
}

-static struct or_calltable or_ipv4 = {
+struct or_calltable or_ipv4 = {
tcp_v4_send_synack,
tcp_v4_or_free
};

-static int tcp_v4_syn_filter(struct sock *sk, struct sk_buff *skb, __u32 saddr)
-{
- return 0;
-}
-
-int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr, __u32 isn)
+int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
+ __u32 isn)
{
struct ip_options *opt = (struct ip_options *) ptr;
struct tcp_opt tp;
@@ -909,6 +912,7 @@
struct tcphdr *th = skb->h.th;
__u32 saddr = skb->nh.iph->saddr;
__u32 daddr = skb->nh.iph->daddr;
+ int want_cookie = sysctl_tcp_always_syncookie;

/* If the socket is dead, don't accept the connection. */
if (sk->dead) {
@@ -917,15 +921,31 @@
return -ENOTCONN;
}

- if (sk->ack_backlog >= sk->max_ack_backlog ||
- tcp_v4_syn_filter(sk, skb, saddr)) {
- SOCK_DEBUG(sk, "dropping syn ack:%d max:%d\n", sk->ack_backlog,
- sk->max_ack_backlog);
-#ifdef CONFIG_IP_TCPSF
- tcp_v4_random_drop(sk);
+ if (sk->ack_backlog >= sk->max_ack_backlog || want_cookie) {
+#ifdef CONFIG_SYN_COOKIES
+ if (sysctl_tcp_syncookies) {
+ static unsigned long warntime;
+
+ if (!want_cookie && (jiffies - warntime > HZ*60)) {
+ warntime = jiffies;
+ printk(KERN_INFO
+ "possible SYN flooding on port %d. Sending cookies.\n", ntohs(skb->h.th->dest));
+ }
+ want_cookie = 1;
+ } else
#endif
- tcp_statistics.TcpAttemptFails++;
- goto exit;
+ {
+ if (want_cookie) { want_cookie=0; goto ok; }
+ SOCK_DEBUG(sk, "dropping syn ack:%d max:%d\n", sk->ack_backlog,
+ sk->max_ack_backlog);
+ tcp_statistics.TcpAttemptFails++;
+ goto exit;
+ }
+ } else {
+ ok:
+ if (isn == 0)
+ isn = tcp_v4_init_sequence(sk, skb);
+ sk->ack_backlog++;
}

req = tcp_openreq_alloc();
@@ -934,15 +954,12 @@
goto exit;
}

- sk->ack_backlog++;
-
req->rcv_wnd = 0; /* So that tcp_send_synack() knows! */

req->rcv_isn = skb->seq;
- req->snt_isn = isn;
- tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
+ tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
tp.in_mss = 536;
- tcp_parse_options(th,&tp);
+ tcp_parse_options(th,&tp, want_cookie);
if (tp.saw_tstamp)
req->ts_recent = tp.rcv_tsval;
req->mss = tp.in_mss;
@@ -954,8 +971,17 @@
req->af.v4_req.loc_addr = daddr;
req->af.v4_req.rmt_addr = saddr;

+ /* Note that we ignore the isn passed from the TIME_WAIT
+ * state here. That's the price we pay for cookies.
+ */
+ if (want_cookie)
+ isn = cookie_v4_init_sequence(sk, skb, &req->mss);
+
+ req->snt_isn = isn;
+
/* IPv4 options */
req->af.v4_req.opt = NULL;
+
if (opt && opt->optlen) {
int opt_size = sizeof(struct ip_options) + opt->optlen;

@@ -973,9 +999,15 @@

tcp_v4_send_synack(sk, req);

- req->expires = jiffies + TCP_TIMEOUT_INIT;
- tcp_inc_slow_timer(TCP_SLT_SYNACK);
- tcp_synq_queue(&sk->tp_pinfo.af_tcp, req);
+ if (want_cookie) {
+ if (req->af.v4_req.opt)
+ kfree(req->af.v4_req.opt);
+ tcp_openreq_free(req);
+ } else {
+ req->expires = jiffies + TCP_TIMEOUT_INIT;
+ tcp_inc_slow_timer(TCP_SLT_SYNACK);
+ tcp_synq_queue(&sk->tp_pinfo.af_tcp, req);
+ }

sk->data_ready(sk, 0);

@@ -985,24 +1017,30 @@
}

struct sock * tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
- struct open_request *req)
+ struct open_request *req,
+ struct dst_entry *dst)
{
struct tcp_opt *newtp;
struct sock *newsk;
- struct rtable *rt;
int snd_mss;

+ if (!skb->h.th->ack)
+ return NULL;
+
newsk = sk_alloc(GFP_ATOMIC);
- if (newsk == NULL)
+ if (newsk == NULL) {
+ if (dst)
+ dst_release(dst);
return NULL;
+ }

memcpy(newsk, sk, sizeof(*newsk));

/* Or else we die! -DaveM */
newsk->sklist_next = NULL;

- newsk->opt = NULL;
- newsk->dst_cache = NULL;
+ newsk->opt = req->af.v4_req.opt;
+
skb_queue_head_init(&newsk->write_queue);
skb_queue_head_init(&newsk->receive_queue);
skb_queue_head_init(&newsk->out_of_order_queue);
@@ -1072,17 +1110,21 @@
newsk->rcv_saddr = req->af.v4_req.loc_addr;

/* options / mss / route_cache */
- newsk->opt = req->af.v4_req.opt;
- if (ip_route_output(&rt,
- newsk->opt && newsk->opt->srr ? newsk->opt->faddr : newsk->daddr,
- newsk->saddr, newsk->ip_tos, NULL)) {
- kfree(newsk);
- return NULL;
- }
-
- newsk->dst_cache = &rt->u.dst;
-
- snd_mss = rt->u.dst.pmtu;
+ if (dst == NULL) {
+ struct rtable *rt;
+
+ if (ip_route_output(&rt,
+ newsk->opt && newsk->opt->srr ?
+ newsk->opt->faddr : newsk->daddr,
+ newsk->saddr, newsk->ip_tos, NULL)) {
+ kfree(newsk);
+ return NULL;
+ }
+ dst = &rt->u.dst;
+ }
+ newsk->dst_cache = dst;
+
+ snd_mss = dst->pmtu;

/* FIXME: is mtu really the same as snd_mss? */
newsk->mtu = snd_mss;
@@ -1124,7 +1166,7 @@
return newsk;
}

-struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb)
+static inline struct sock *tcp_v4_check_req(struct sock *sk, struct sk_buff *skb, struct ip_options *opt)
{
struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
struct open_request *req = tp->syn_wait_queue;
@@ -1133,8 +1175,13 @@
* as we checked the user count on tcp_rcv and we're
* running from a soft interrupt.
*/
- if(!req)
- return sk;
+ if(!req) {
+#ifdef CONFIG_SYN_COOKIES
+ goto checkcookie;
+#else
+ return NULL;
+#endif
+ }

while(req) {
if (req->af.v4_req.rmt_addr == skb->nh.iph->saddr &&
@@ -1147,7 +1194,7 @@
* yet accepted()...
*/
sk = req->sk;
- break;
+ goto ende;
}

/* Check for syn retransmission */
@@ -1161,20 +1208,25 @@
return NULL;
}

- sk = tp->af_specific->syn_recv_sock(sk, skb, req);
+ sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
tcp_dec_slow_timer(TCP_SLT_SYNACK);
if (sk == NULL)
return NULL;

req->expires = 0UL;
req->sk = sk;
- break;
+ goto ende;
}
req = req->dl_next;
}

- skb_orphan(skb);
- skb_set_owner_r(skb, sk);
+#ifdef CONFIG_SYN_COOKIES
+checkcookie:
+ sk = cookie_v4_check(sk, skb, opt);
+#endif
+ende: skb_orphan(skb);
+ if (sk)
+ skb_set_owner_r(skb, sk);
return sk;
}

@@ -1195,20 +1247,28 @@
goto ok;
}

- if (sk->state == TCP_LISTEN) {
- struct sock *nsk;
+ /*
+ * We check packets with only the SYN bit set against the
+ * open_request queue too: This increases connection latency a bit,
+ * but is required to detect retransmitted SYNs.
+ *
+ * The ACK bit check is probably not needed here because
+ * it is checked in syn_recv_sock again (we play save now).
+ */
+ if (sk->state == TCP_LISTEN && (skb->h.th->ack || skb->h.th->syn)) {
+ struct sock *nsk;

- /* Find possible connection requests. */
- nsk = tcp_v4_check_req(sk, skb);
- if (nsk == NULL)
+ /* Find possible connection requests. */
+ nsk = tcp_v4_check_req(sk, skb, &(IPCB(skb)->opt));
+ if (nsk == NULL)
goto discard_it;
-
- release_sock(sk);
- lock_sock(nsk);
+
+ release_sock(sk);
+ lock_sock(nsk);
sk = nsk;
}

- if (tcp_rcv_state_process(sk, skb, skb->h.th, NULL, skb->len) == 0)
+ if (tcp_rcv_state_process(sk, skb, skb->h.th, &(IPCB(skb)->opt), skb->len) == 0)
goto ok;

reset:
@@ -1352,7 +1412,6 @@
tcp_v4_rebuild_header,
tcp_v4_conn_request,
tcp_v4_syn_recv_sock,
- tcp_v4_init_sequence,
tcp_v4_get_sock,
ip_setsockopt,
ip_getsockopt,
Only in linux/net/ipv4: tcp_ipv4.c~
diff -x .* --recursive -u linux-clean/net/ipv4/tcp_timer.c linux/net/ipv4/tcp_timer.c
--- linux-clean/net/ipv4/tcp_timer.c Thu May 15 00:01:21 1997
+++ linux/net/ipv4/tcp_timer.c Thu May 29 22:40:38 1997
@@ -22,7 +22,11 @@

#include <net/tcp.h>

-int sysctl_syn_retries = TCP_SYN_RETRIES;
+int sysctl_tcp_syn_retries = TCP_SYN_RETRIES;
+int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME;
+int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES;
+int sysctl_tcp_retries1 = TCP_RETR1;
+int sysctl_tcp_retries2 = TCP_RETR2;

static void tcp_sltimer_handler(unsigned long);
static void tcp_syn_recv_timer(unsigned long);
@@ -172,7 +176,7 @@
/* Eric, what the heck is this doing?!?! */
tp->retransmits && !(tp->retransmits & 7)) ||

- (sk->state != TCP_ESTABLISHED && tp->retransmits > TCP_RETR1)) {
+ (sk->state != TCP_ESTABLISHED && tp->retransmits > sysctl_tcp_retries1)) {
/* Attempt to recover if arp has changed (unlikely!) or
* a route has shifted (not supported prior to 1.3).
*/
@@ -180,7 +184,7 @@
}

/* Have we tried to SYN too many times (repent repent 8)) */
- if(tp->retransmits > sysctl_syn_retries && sk->state==TCP_SYN_SENT) {
+ if(tp->retransmits > sysctl_tcp_syn_retries && sk->state==TCP_SYN_SENT) {
if(sk->err_soft)
sk->err=sk->err_soft;
else
@@ -198,7 +202,7 @@
}

/* Has it gone just too far? */
- if (tp->retransmits > TCP_RETR2) {
+ if (tp->retransmits > sysctl_tcp_retries2) {
if(sk->err_soft)
sk->err = sk->err_soft;
else
@@ -251,7 +255,7 @@
* FIXME: We ought not to do it, Solaris 2.5 actually has fixing
* this behaviour in Solaris down as a bug fix. [AC]
*/
- if (tp->probes_out > TCP_RETR2) {
+ if (tp->probes_out > sysctl_tcp_retries2) {
if(sk->err_soft)
sk->err = sk->err_soft;
else
@@ -281,8 +285,8 @@
struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
__u32 elapsed = jiffies - tp->rcv_tstamp;

- if (elapsed >= TCP_KEEPALIVE_TIME) {
- if (tp->probes_out > TCP_KEEPALIVE_PROBES) {
+ if (elapsed >= sysctl_tcp_keepalive_time) {
+ if (tp->probes_out > sysctl_tcp_keepalive_probes) {
if(sk->err_soft)
sk->err = sk->err_soft;
else
@@ -316,6 +320,8 @@
*/
#define MAX_KA_PROBES 5

+int sysctl_tcp_max_ka_probes = MAX_KA_PROBES;
+
/* Keepopen's are only valid for "established" TCP's, nicely our listener
* hash gets rid of most of the useless testing, so we run through a couple
* of the established hash chains each clock tick. -DaveM
@@ -341,7 +347,7 @@
while(sk) {
if(sk->keepopen) {
count += tcp_keepopen_proc(sk);
- if(count == MAX_KA_PROBES)
+ if(count == sysctl_tcp_max_ka_probes)
goto out;
}
sk = sk->next;
@@ -455,7 +461,7 @@
break;

tcp_synq_unlink(tp, conn);
- if (conn->retrans >= TCP_RETR1) {
+ if (conn->retrans >= sysctl_tcp_retries1) {
#ifdef TCP_DEBUG
printk(KERN_DEBUG "syn_recv: "
"too many retransmits\n");
Only in linux/net/ipv4: tcp_timer.c~
diff -x .* --recursive -u linux-clean/net/ipv4/utils.c linux/net/ipv4/utils.c
--- linux-clean/net/ipv4/utils.c Thu May 15 00:01:21 1997
+++ linux/net/ipv4/utils.c Mon Jun 2 20:35:14 1997
@@ -13,7 +13,7 @@
* Fixes:
* Alan Cox : verify_area check.
* Alan Cox : removed old debugging.
- *
+ * Andi Kleen : add net_ratelimit()
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -89,3 +89,24 @@
return(htonl(l));
}

+/*
+ * This enforces a rate limit: not more than one kernel message
+ * every 5secs to make a denial-of-service attack impossible.
+ *
+ * All warning printk()s should be guarded by this function.
+ */
+int net_ratelimit(void)
+{
+ static unsigned long last_msg;
+ static int missed;
+
+ if ((jiffies - last_msg) >= 5*HZ) {
+ if (missed)
+ printk(KERN_WARNING "ipv4: (%d messages suppressed. Flood?)\n", missed);
+ missed = 0;
+ last_msg = jiffies;
+ return 1;
+ }
+ missed++;
+ return 0;
+}
Only in linux/net/ipv4: utils.c~
diff -x .* --recursive -u linux-clean/net/ipv6/tcp_ipv6.c linux/net/ipv6/tcp_ipv6.c
--- linux-clean/net/ipv6/tcp_ipv6.c Mon May 12 19:35:45 1997
+++ linux/net/ipv6/tcp_ipv6.c Sun Jun 1 00:36:54 1997
@@ -695,7 +695,7 @@
* Can some kind of merge be done? -- erics
*/
static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb, void *ptr,
- __u32 isn)
+ __u32 isn)
{
struct tcp_opt tp;
struct open_request *req;
@@ -711,6 +711,9 @@
if (skb->protocol == __constant_htons(ETH_P_IP))
return tcp_v4_conn_request(sk, skb, ptr, isn);

+ if (isn == 0)
+ isn = tcp_v6_init_sequence(sk,skb);
+
/*
* There are no SYN attacks on IPv6, yet...
*/
@@ -735,7 +738,7 @@
req->snt_isn = isn;
tp.tstamp_ok = tp.sack_ok = tp.wscale_ok = tp.snd_wscale = 0;
tp.in_mss = 536;
- tcp_parse_options(skb->h.th,&tp);
+ tcp_parse_options(skb->h.th,&tp,0);
if (tp.saw_tstamp)
req->ts_recent = tp.rcv_tsval;
req->mss = tp.in_mss;
@@ -778,10 +781,10 @@
}

static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
- struct open_request *req)
+ struct open_request *req,
+ struct dst_entry *dst)
{
struct ipv6_pinfo *np;
- struct dst_entry *dst;
struct flowi fl;
struct tcp_opt *newtp;
struct sock *newsk;
@@ -791,11 +794,11 @@
* v6 mapped
*/

- newsk = tcp_v4_syn_recv_sock(sk, skb, req);
+ newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);

- if (newsk == NULL)
+ if (newsk == NULL)
return NULL;
-
+
np = &newsk->net_pinfo.af_inet6;

ipv6_addr_set(&np->daddr, 0, 0, __constant_htonl(0x0000FFFF),
@@ -813,8 +816,11 @@
}

newsk = sk_alloc(GFP_ATOMIC);
- if (newsk == NULL)
+ if (newsk == NULL) {
+ if (dst)
+ dst_release(dst);
return NULL;
+ }

memcpy(newsk, sk, sizeof(*newsk));

@@ -902,18 +908,20 @@
ipv6_addr_copy(&np->rcv_saddr, &req->af.v6_req.loc_addr);
np->oif = req->af.v6_req.dev;

- /*
- * options / mss / route cache
- */
-
- fl.proto = IPPROTO_TCP;
- fl.nl_u.ip6_u.daddr = &np->daddr;
- fl.nl_u.ip6_u.saddr = &np->saddr;
- fl.dev = np->oif;
- fl.uli_u.ports.dport = newsk->dummy_th.dest;
- fl.uli_u.ports.sport = newsk->dummy_th.source;
-
- dst = ip6_route_output(newsk, &fl);
+ if (dst == NULL) {
+ /*
+ * options / mss / route cache
+ */
+
+ fl.proto = IPPROTO_TCP;
+ fl.nl_u.ip6_u.daddr = &np->daddr;
+ fl.nl_u.ip6_u.saddr = &np->saddr;
+ fl.dev = np->oif;
+ fl.uli_u.ports.dport = newsk->dummy_th.dest;
+ fl.uli_u.ports.sport = newsk->dummy_th.source;
+
+ dst = ip6_route_output(newsk, &fl);
+ }

ip6_dst_store(newsk, dst);

@@ -1051,7 +1059,7 @@
}

skb_orphan(skb);
- sk = tp->af_specific->syn_recv_sock(sk, skb, req);
+ sk = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);

tcp_dec_slow_timer(TCP_SLT_SYNACK);

@@ -1308,7 +1316,6 @@
tcp_v6_rebuild_header,
tcp_v6_conn_request,
tcp_v6_syn_recv_sock,
- tcp_v6_init_sequence,
tcp_v6_get_sock,
ipv6_setsockopt,
ipv6_getsockopt,
@@ -1328,7 +1335,6 @@
tcp_v4_rebuild_header,
tcp_v6_conn_request,
tcp_v6_syn_recv_sock,
- tcp_v6_init_sequence,
tcp_v6_get_sock,
ipv6_setsockopt,
ipv6_getsockopt,
Only in linux/net/ipv6: tcp_ipv6.c~
diff -x .* --recursive -u linux-clean/net/unix/af_unix.c linux/net/unix/af_unix.c
--- linux-clean/net/unix/af_unix.c Mon May 12 19:35:45 1997
+++ linux/net/unix/af_unix.c Thu May 29 22:09:47 1997
@@ -87,6 +87,8 @@

#define min(a,b) (((a)<(b))?(a):(b))

+int sysctl_unix_delete_delay = HZ;
+int sysctl_unix_destroy_delay = 10*HZ;

unix_socket *unix_socket_table[UNIX_HASH_SIZE+1];

@@ -240,7 +242,7 @@
* Retry;
*/

- sk->timer.expires=jiffies+10*HZ; /* No real hurry try it every 10 seconds or so */
+ sk->timer.expires=jiffies+sysctl_unix_destroy_delay; /* No real hurry try it every 10 seconds or so */
add_timer(&sk->timer);
}

@@ -248,7 +250,7 @@
static void unix_delayed_delete(unix_socket *sk)
{
sk->timer.data=(unsigned long)sk;
- sk->timer.expires=jiffies+HZ; /* Normally 1 second after will clean up. After that we try every 10 */
+ sk->timer.expires=jiffies+sysctl_unix_delete_delay; /* Normally 1 second after will clean up. After that we try every 10 */
sk->timer.function=unix_destroy_timer;
add_timer(&sk->timer);
}
diff -x .* --recursive -u linux-clean/net/unix/sysctl_net_unix.c linux/net/unix/sysctl_net_unix.c
--- linux-clean/net/unix/sysctl_net_unix.c Thu Jan 2 14:13:35 1997
+++ linux/net/unix/sysctl_net_unix.c Thu May 29 22:27:21 1997
@@ -14,6 +14,15 @@
#include <linux/mm.h>
#include <linux/sysctl.h>

+extern int sysctl_unix_destroy_delay;
+extern int sysctl_unix_delete_delay;
+
ctl_table unix_table[] = {
+ {NET_UNIX_DESTROY_DELAY, "destroy_delay",
+ &sysctl_unix_destroy_delay, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
+ {NET_UNIX_DELETE_DELAY, "delete_delay",
+ &sysctl_unix_delete_delay, sizeof(int), 0644, NULL,
+ &proc_dointvec_jiffies},
{0}
};
--- /dev/null Tue Jan 1 05:00:00 1980
+++ linux/net/ipv4/syncookies.c Tue Jun 3 01:10:15 1997
@@ -0,0 +1,221 @@
+/*
+ * Syncookies implementation for the Linux kernel
+ *
+ * Copyright (C) 1997 Andi Kleen
+ * Based on ideas by D.J.Bernstein and Eric Schenk.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * $Id$
+ *
+ * Missing: IPv6 support.
+ * Some counter so that the Administrator can see when the machine
+ * is under a syn flood attack.
+ */
+
+#include <linux/config.h>
+#if defined(CONFIG_SYN_COOKIES)
+#include <linux/tcp.h>
+#include <linux/malloc.h>
+#include <linux/random.h>
+#include <net/tcp.h>
+
+extern int sysctl_tcp_syncookies;
+
+static unsigned long tcp_lastsynq_overflow;
+
+/*
+ * This table has to be sorted. Only 8 entries are allowed and the
+ * last entry has to be duplicated.
+ * XXX generate a better table.
+ * Unresolved Issues: HIPPI with a 64k MSS is not well supported.
+ */
+static __u16 const msstab[] = {
+ 64,
+ 256,
+ 512,
+ 536,
+ 1024,
+ 1440,
+ 1460,
+ 4312,
+ 4312
+};
+
+static __u32 make_syncookie(struct sk_buff *skb, __u32 counter, __u32 seq)
+{
+ __u32 z;
+
+ z = secure_tcp_syn_cookie(skb->nh.iph->saddr, skb->nh.iph->daddr,
+ skb->h.th->source, skb->h.th->dest,
+ seq,
+ counter);
+
+#if 0
+ printk(KERN_DEBUG
+ "msc: z=%u,cnt=%u,seq=%u,sadr=%u,dadr=%u,sp=%u,dp=%u\n",
+ z,counter,seq,
+ skb->nh.iph->saddr,skb->nh.iph->daddr,
+ ntohs(skb->h.th->source), ntohs(skb->h.th->dest));
+#endif
+
+ return z;
+}
+
+/*
+ * Generate a syncookie.
+ */
+__u32 cookie_v4_init_sequence(struct sock *sk, struct sk_buff *skb, __u16 *mssp)
+{
+ int i;
+ __u32 isn;
+ const __u16 mss = *mssp, *w;
+
+ tcp_lastsynq_overflow = jiffies;
+
+ isn = make_syncookie(skb, (jiffies/HZ) >> 6, ntohl(skb->h.th->seq));
+
+ /* XXX sort msstab[] by probability? */
+ w = msstab;
+ for (i = 0; i < 8; i++)
+ if (mss >= *w && mss < *++w)
+ goto found;
+ i--;
+found:
+ *mssp = w[-1];
+
+ isn |= i;
+ return isn;
+}
+
+/* This value should be dependant on TCP_TIMEOUT_INIT and
+ * sysctl_tcp_retries1. It's a rather complicated formula
+ * (expotential backoff) to compute at runtime so it's currently hardcoded
+ * here.
+ */
+#define COUNTER_TRIES 4
+
+/*
+ * Check if a ack sequence number is a valid syncookie.
+ */
+static inline int cookie_check(struct sk_buff *skb, __u32 cookie)
+{
+ int mssind;
+ int i;
+ __u32 counter;
+
+ if ((jiffies - tcp_lastsynq_overflow) > TCP_TIMEOUT_INIT
+ && tcp_lastsynq_overflow) {
+ return 0;
+ }
+
+ mssind = cookie & 7;
+ cookie &= ~7;
+
+ counter = (jiffies/HZ)>>6;
+ for (i = 0; i < COUNTER_TRIES; i++)
+ if (make_syncookie(skb, counter-i,
+ ntohl(skb->h.th->seq)-1) == cookie)
+ {
+ return msstab[mssind];
+ }
+
+ return 0;
+}
+
+extern struct or_calltable or_ipv4;
+
+static inline struct sock *
+get_cookie_sock(struct sock *sk, struct sk_buff *skb, struct open_request *req,
+ struct dst_entry *dst)
+{
+ struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+
+ sk = tp->af_specific->syn_recv_sock(sk, skb, req,dst);
+ req->sk = sk;
+
+ /* Queue up for accept() */
+ tcp_synq_queue(tp, req);
+
+ return sk;
+}
+
+struct sock *
+cookie_v4_check(struct sock *sk, struct sk_buff *skb, struct ip_options *opt)
+{
+ __u32 cookie = ntohl(skb->h.th->ack_seq)-1;
+ struct open_request *req;
+ int mss;
+ struct rtable *rt;
+
+ if (!sysctl_tcp_syncookies)
+ return sk;
+ if (!skb->h.th->ack)
+ return sk;
+
+ mss = cookie_check(skb, cookie);
+ if (mss == 0)
+ return sk;
+
+ req = tcp_openreq_alloc();
+ if (req == NULL)
+ return NULL;
+
+ req->rcv_isn = htonl(skb->h.th->seq)-1;
+ req->snt_isn = cookie;
+ req->mss = mss;
+ req->rmt_port = skb->h.th->source;
+ req->af.v4_req.loc_addr = skb->nh.iph->daddr;
+ req->af.v4_req.rmt_addr = skb->nh.iph->saddr;
+ req->class = &or_ipv4; /* for savety */
+
+ /* We throwed the options of the initial SYN away, so we hope
+ * the ACK carries the same options again (see RFC1122 4.2.3.8)
+ */
+ if (opt && opt->optlen) {
+ int opt_size = sizeof(struct ip_options) + opt->optlen;
+
+ req->af.v4_req.opt = kmalloc(opt_size, GFP_ATOMIC);
+ if (req->af.v4_req.opt) {
+ if (ip_options_echo(req->af.v4_req.opt, skb)) {
+ kfree_s(req->af.v4_req.opt, opt_size);
+ req->af.v4_req.opt = NULL;
+ }
+ }
+ }
+
+ req->af.v4_req.opt = NULL;
+
+ req->snd_wscale = req->rcv_wscale = req->tstamp_ok = 0;
+ req->wscale_ok = 0;
+
+ req->expires = 0UL;
+ req->retrans = 0;
+
+ /*
+ * We need to lookup the route here to get at the correct
+ * window size. We should better make sure that the window size
+ * hasn't changed since we received the original syn, but I see
+ * no easy way to do this.
+ */
+ if (ip_route_output(&rt,
+ opt && opt->srr ? opt->faddr :
+ req->af.v4_req.rmt_addr,req->af.v4_req.loc_addr,
+ sk->ip_tos, NULL)) {
+ tcp_openreq_free(req);
+ return NULL;
+ }
+
+ /* Try to redo what tcp_v4_send_synack did. */
+ req->window_clamp = rt->u.dst.window;
+ tcp_select_initial_window(sock_rspace(sk)/2,req->mss,
+ &req->rcv_wnd, &req->window_clamp,
+ 0, &req->rcv_wscale);
+
+ return get_cookie_sock(sk, skb, req, &rt->u.dst);
+}
+
+#endif