Re: [PATCH net-next] inet: add ip_local_port_step_width sysctl to improve port usage distribution
From: Kuniyuki Iwashima
Date: Wed Feb 25 2026 - 01:34:20 EST
On Tue, Feb 24, 2026 at 7:05 AM Fernando Fernandez Mancera
<fmancera@xxxxxxx> wrote:
>
> With the current port selection algorithm, ports after a reserved port
> range or long time used port are used more often than others [1]. This
> causes an uneven port usage distribution. This combines with cloud
> environments blocking connections between the application server and the
> database server if there was a previous connection with the same source
> port, leading to connectivity problems between applications on cloud
> environments.
>
> The real issue here is that these firewalls cannot cope with
> standards-compliant port reuse. This is a workaround for such situations
> and an improvement on the distribution of ports selected.
>
> The proposed solution is to implement a variant of RFC 6056 Algorithm 5.
> The step size is selected randomly on every connect() call ensuring it
> is a coprime with respect to the size of the range of ports we want to
> scan. This way, we can ensure that all ports within the range are
> scanned before returning an error. To enable this algorithm, the user
> must configure the new sysctl option "net.ipv4.ip_local_port_step_width".
>
> In addition, on graphs generated we can observe that the distribution of
> source ports is more even with the proposed approach. [2]
>
> [1] https://0xffsoftware.com/port_graph_current_alg.html
>
> [2] https://0xffsoftware.com/port_graph_random_step_alg.html
>
> Signed-off-by: Fernando Fernandez Mancera <fmancera@xxxxxxx>
> ---
> Documentation/networking/ip-sysctl.rst | 9 ++++++++
> .../net_cachelines/netns_ipv4_sysctl.rst | 1 +
> include/net/netns/ipv4.h | 1 +
> net/ipv4/inet_hashtables.c | 22 ++++++++++++++++---
> net/ipv4/sysctl_net_ipv4.c | 7 ++++++
> 5 files changed, 37 insertions(+), 3 deletions(-)
>
> diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
> index 6921d8594b84..9e2625ee778c 100644
> --- a/Documentation/networking/ip-sysctl.rst
> +++ b/Documentation/networking/ip-sysctl.rst
> @@ -1612,6 +1612,15 @@ ip_local_reserved_ports - list of comma separated ranges
>
> Default: Empty
>
> +ip_local_port_step_width - INTEGER
> + Defines the numerical maximum increment between successive port
> + allocations within the ephemeral port range when an unavailable port is
> + reached. This can be used to mitigate accumulated nodes in port
> + distribution when reserved ports have been configured. Please note that
> + port collisions may be more frequent in a system with a very high load.
> +
> + Default: 0 (disabled)
> +
> ip_unprivileged_port_start - INTEGER
> This is a per-namespace sysctl. It defines the first
> unprivileged port in the network namespace. Privileged ports
> diff --git a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst
> index beaf1880a19b..c0e194a6e4ee 100644
> --- a/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst
> +++ b/Documentation/networking/net_cachelines/netns_ipv4_sysctl.rst
> @@ -47,6 +47,7 @@ u8 sysctl_tcp_ecn
> u8 sysctl_tcp_ecn_fallback
> u8 sysctl_ip_default_ttl ip4_dst_hoplimit/ip_select_ttl
> u8 sysctl_ip_no_pmtu_disc
> +u32 sysctl_ip_local_port_step_width
> u8 sysctl_ip_fwd_use_pmtu read_mostly ip_dst_mtu_maybe_forward/ip_skb_dst_mtu
> u8 sysctl_ip_fwd_update_priority ip_forward
> u8 sysctl_ip_nonlocal_bind
> diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
> index 8e971c7bf164..fb7c2235af21 100644
> --- a/include/net/netns/ipv4.h
> +++ b/include/net/netns/ipv4.h
> @@ -166,6 +166,7 @@ struct netns_ipv4 {
> u8 sysctl_ip_autobind_reuse;
> /* Shall we try to damage output packets if routing dev changes? */
> u8 sysctl_ip_dynaddr;
> + u32 sysctl_ip_local_port_step_width;
> #ifdef CONFIG_NET_L3_MASTER_DEV
> u8 sysctl_raw_l3mdev_accept;
> #endif
> diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
> index f5826ec4bcaa..1992dc21818f 100644
> --- a/net/ipv4/inet_hashtables.c
> +++ b/net/ipv4/inet_hashtables.c
> @@ -16,6 +16,7 @@
> #include <linux/wait.h>
> #include <linux/vmalloc.h>
> #include <linux/memblock.h>
> +#include <linux/gcd.h>
>
> #include <net/addrconf.h>
> #include <net/inet_connection_sock.h>
> @@ -1046,12 +1047,12 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
> struct net *net = sock_net(sk);
> struct inet_bind2_bucket *tb2;
> struct inet_bind_bucket *tb;
> + int step, scan_step, l3mdev;
> + u32 index, max_rand_step;
> bool tb_created = false;
> u32 remaining, offset;
> int ret, i, low, high;
> bool local_ports;
> - int step, l3mdev;
> - u32 index;
>
> if (port) {
> local_bh_disable();
> @@ -1065,6 +1066,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
>
> local_ports = inet_sk_get_local_port_range(sk, &low, &high);
> step = local_ports ? 1 : 2;
> + scan_step = step;
> + max_rand_step = READ_ONCE(net->ipv4.sysctl_ip_local_port_step_width);
>
> high++; /* [32768, 60999] -> [32768, 61000[ */
> remaining = high - low;
> @@ -1083,9 +1086,22 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
> */
> if (!local_ports)
> offset &= ~1U;
> +
> + if (max_rand_step && remaining > 1) {
> + u32 range = (step == 1) ? remaining : (remaining / 2);
> + u32 upper_bound = min(range, max_rand_step);
> +
> + scan_step = get_random_u32_inclusive(1, upper_bound);
> + while (gcd(scan_step, range) != 1) {
> + scan_step++;
If both scan_step and range are even, an extra
increment here saves 1/2 calls of gcd().
> + if (unlikely(scan_step > upper_bound))
> + scan_step = 1;
> + }
> + scan_step *= step;
> + }
> other_parity_scan:
Doing "other_parity_scan" will be just redundant
unless scan_step is 2 ?
> port = low + offset;
> - for (i = 0; i < remaining; i += step, port += step) {
> + for (i = 0; i < remaining; i += step, port += scan_step) {
> if (unlikely(port >= high))
> port -= remaining;
> if (inet_is_local_reserved_port(net, port))
> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
> index 643763bc2142..c533374f656c 100644
> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -822,6 +822,13 @@ static struct ctl_table ipv4_net_table[] = {
> .mode = 0644,
> .proc_handler = ipv4_local_port_range,
> },
> + {
> + .procname = "ip_local_port_step_width",
> + .maxlen = sizeof(u32),
> + .data = &init_net.ipv4.sysctl_ip_local_port_step_width,
> + .mode = 0644,
> + .proc_handler = proc_douintvec,
> + },
> {
> .procname = "ip_local_reserved_ports",
> .data = &init_net.ipv4.sysctl_local_reserved_ports,
> --
> 2.53.0
>