Soft Lockup in "__udp4_lib_lookup", Maybe a GCC's bug

From: Cai, Jason
Date: Wed Apr 05 2017 - 12:43:01 EST


Hi guys,

I'm using linux-3.2, yes, it's pretty old I know, and I'm going to
move on a latest stable version.

I hit a soft lockup issue in function `__udp4_lib_lookup`. And it
turns out that the soft lockup results from that it got a hlist_nulls_node
from a hash slot, but that hlist_nulls_node relates to another hash
slot, and the code will spin as the following:

```
begin:
result = NULL;
badness = -1;
sk_nulls_for_each_rcu(sk, node, &hslot->head) {
score = compute_score(sk, net, saddr, hnum, sport,
daddr, dport, dif);
if (score > badness) {
result = sk;
badness = score;
}
}
/*
* if the nulls value we got at the end of this lookup is
* not the expected one, we must restart lookup.
* We probably met an item that was moved to another chain.
*/
if (get_nulls_value(node) != slot)
goto begin;

```

After analyzing the disassembly, I would imagine that maybe it's
GCC's bad, it incorrectly reused the register `r8`, so that it
won't re-access `hslot->head` when restarting `sk_nulls_for_each_rcu()`

The GCC I'm using is 4.5.1, it is also pretty old, yes, I know.
And please look at the followings (added some inline comments):

Dump of assembler code for function __udp4_lib_lookup:
linux-3.2/net/ipv4/udp.c:
451 {
0xffffffff8134c98f <+0>: push %rbp
0xffffffff8134c990 <+1>: mov %rsp,%rbp
0xffffffff8134c993 <+4>: push %r15
0xffffffff8134c995 <+6>: push %r14
0xffffffff8134c997 <+8>: push %r13
0xffffffff8134c999 <+10>: push %r12
0xffffffff8134c99b <+12>: push %rbx
0xffffffff8134c99c <+13>: sub $0x48,%rsp
0xffffffff8134c9a0 <+17>: callq 0xffffffff813a2e80 <mcount>

include/linux/swab.h:
51 return ___constant_swab16(val);
0xffffffff8134c9a5 <+22>: rol $0x8,%r8w

/linux-3.2/net/ipv4/udp.c:
451 {
0xffffffff8134c9aa <+27>: mov 0x10(%rbp),%r13

include/linux/swab.h:
51 return ___constant_swab16(val);
0xffffffff8134c9ae <+31>: mov %r8w,-0x32(%rbp)

/linux-3.2/net/ipv4/udp.c:
451 {
0xffffffff8134c9b3 <+36>: mov %ecx,%r15d

452 struct sock *sk, *result;
453 struct hlist_nulls_node *node;
454 unsigned short hnum = ntohs(dport);
455 unsigned int hash2, slot2, slot = udp_hashfn(net, hnum, udptable->mask);
0xffffffff8134c9b6 <+39>: mov 0x10(%r13),%r8d
0xffffffff8134c9ba <+43>: movzwl -0x32(%rbp),%r14d

include/net/netns/hash.h:
16 return (unsigned)(((unsigned long)net) >> L1_CACHE_SHIFT);
0xffffffff8134c9bf <+48>: mov %rdi,%rax

/linux-3.2/net/ipv4/udp.c:
451 {
0xffffffff8134c9c2 <+51>: mov %rdi,%r12

include/net/netns/hash.h:
16 return (unsigned)(((unsigned long)net) >> L1_CACHE_SHIFT);
0xffffffff8134c9c5 <+54>: shr $0x6,%rax

/linux-3.2/net/ipv4/udp.c:
451 {
0xffffffff8134c9c9 <+58>: mov %esi,-0x38(%rbp)

include/linux/udp.h:
52 return (num + net_hash_mix(net)) & mask;
0xffffffff8134c9cc <+61>: lea (%r14,%rax,1),%eax

/linux-3.2/net/ipv4/udp.c:
451 {
0xffffffff8134c9d0 <+65>: mov %r9d,-0x3c(%rbp)

456 struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
0xffffffff8134c9d4 <+69>: and %r8d,%eax

451 {
0xffffffff8134c9d7 <+72>: mov %dx,-0x3e(%rbp)

456 struct udp_hslot *hslot2, *hslot = &udptable->hash[slot];
0xffffffff8134c9db <+76>: mov %rax,%rbx
0xffffffff8134c9de <+79>: mov %rax,-0x48(%rbp)
0xffffffff8134c9e2 <+83>: shl $0x5,%rbx
0xffffffff8134c9e6 <+87>: add 0x0(%r13),%rbx
^~~~~~~~~~~~~~ rbx is hslot

457 int score, badness;
458
459 rcu_read_lock();
460 if (hslot->count > 10) {
0xffffffff8134c9ea <+91>: mov 0x8(%rbx),%ecx
0xffffffff8134c9ed <+94>: cmp $0xa,%ecx
0xffffffff8134c9f0 <+97>: jle 0xffffffff8134ca9e <__udp4_lib_lookup+271>

461 hash2 = udp4_portaddr_hash(net, daddr, hnum);
0xffffffff8134c9f6 <+103>: mov %r14d,%edx
0xffffffff8134c9f9 <+106>: mov %ecx,-0x60(%rbp)
0xffffffff8134c9fc <+109>: mov %r8d,-0x58(%rbp)
0xffffffff8134ca00 <+113>: mov %r15d,%esi
0xffffffff8134ca03 <+116>: callq 0xffffffff8134a74f <udp4_portaddr_hash>

462 slot2 = hash2 & udptable->mask;
0xffffffff8134ca08 <+121>: mov -0x58(%rbp),%r8d

464 if (hslot->count < hslot2->count)
0xffffffff8134ca0c <+125>: mov -0x60(%rbp),%ecx

462 slot2 = hash2 & udptable->mask;
0xffffffff8134ca0f <+128>: and %r8d,%eax

463 hslot2 = &udptable->hash2[slot2];
0xffffffff8134ca12 <+131>: mov %eax,%edx
0xffffffff8134ca14 <+133>: shl $0x5,%rdx
0xffffffff8134ca18 <+137>: add 0x8(%r13),%rdx

464 if (hslot->count < hslot2->count)
0xffffffff8134ca1c <+141>: cmp 0x8(%rdx),%ecx
0xffffffff8134ca1f <+144>: jl 0xffffffff8134ca9e <__udp4_lib_lookup+271>

465 goto begin;
466
467 result = udp4_lib_lookup2(net, saddr, sport,
0xffffffff8134ca21 <+146>: movzwl -0x3e(%rbp),%ecx
0xffffffff8134ca25 <+150>: mov %rdx,(%rsp)
0xffffffff8134ca29 <+154>: mov %ecx,-0x4c(%rbp)
0xffffffff8134ca2c <+157>: mov %r12,%rdi
0xffffffff8134ca2f <+160>: mov %eax,0x8(%rsp)
0xffffffff8134ca33 <+164>: mov -0x3c(%rbp),%r9d
0xffffffff8134ca37 <+168>: mov %r14d,%r8d
0xffffffff8134ca3a <+171>: mov %r15d,%ecx
0xffffffff8134ca3d <+174>: mov -0x4c(%rbp),%edx
0xffffffff8134ca40 <+177>: mov -0x38(%rbp),%esi
0xffffffff8134ca43 <+180>: callq 0xffffffff8134c7bd <udp4_lib_lookup2>
0xffffffff8134ca48 <+185>: mov %rax,%rdi

468 daddr, hnum, dif,
469 hslot2, slot2);
470 if (!result) {
0xffffffff8134ca4b <+188>: test %rax,%rax
0xffffffff8134ca4e <+191>: jne 0xffffffff8134cc0f <__udp4_lib_lookup+640>

471 hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum);
0xffffffff8134ca54 <+197>: mov %r14d,%edx
0xffffffff8134ca57 <+200>: xor %esi,%esi
0xffffffff8134ca59 <+202>: mov %r12,%rdi
0xffffffff8134ca5c <+205>: callq 0xffffffff8134a74f <udp4_portaddr_hash>

472 slot2 = hash2 & udptable->mask;
0xffffffff8134ca61 <+210>: and 0x10(%r13),%eax

473 hslot2 = &udptable->hash2[slot2];
0xffffffff8134ca65 <+214>: mov %eax,%edx
0xffffffff8134ca67 <+216>: shl $0x5,%rdx
0xffffffff8134ca6b <+220>: add 0x8(%r13),%rdx

474 if (hslot->count < hslot2->count)
0xffffffff8134ca6f <+224>: mov 0x8(%rdx),%ecx
0xffffffff8134ca72 <+227>: cmp %ecx,0x8(%rbx)
0xffffffff8134ca75 <+230>: jl 0xffffffff8134ca9e <__udp4_lib_lookup+271>

475 goto begin;
476
477 result = udp4_lib_lookup2(net, saddr, sport,
0xffffffff8134ca77 <+232>: mov %rdx,(%rsp)
0xffffffff8134ca7b <+236>: mov %r12,%rdi
0xffffffff8134ca7e <+239>: mov %eax,0x8(%rsp)
0xffffffff8134ca82 <+243>: mov -0x3c(%rbp),%r9d
0xffffffff8134ca86 <+247>: mov %r14d,%r8d
0xffffffff8134ca89 <+250>: xor %ecx,%ecx
0xffffffff8134ca8b <+252>: mov -0x4c(%rbp),%edx
0xffffffff8134ca8e <+255>: mov -0x38(%rbp),%esi
0xffffffff8134ca91 <+258>: callq 0xffffffff8134c7bd <udp4_lib_lookup2>
0xffffffff8134ca96 <+263>: mov %rax,%rdi
0xffffffff8134ca99 <+266>: jmpq 0xffffffff8134cc0f <__udp4_lib_lookup+640>
0xffffffff8134ca9e <+271>: mov -0x32(%rbp),%r13w

487 sk_nulls_for_each_rcu(sk, node, &hslot->head) {
0xffffffff8134caa3 <+276>: mov (%rbx),%r8 <==
^~~~~~~~~~ Here! hslot->head assigns to r8!

0xffffffff8134caa6 <+279>: jmpq 0xffffffff8134cb2c <__udp4_lib_lookup+413>

333 if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
0xffffffff8134caab <+284>: cmp %r13w,-0x30(%rcx)
0xffffffff8134cab0 <+289>: jne 0xffffffff8134cb27 <__udp4_lib_lookup+408>

334 !ipv6_only_sock(sk)) {
0xffffffff8134cab2 <+291>: mov 0xc(%rsi),%eax

333 if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
0xffffffff8134cab5 <+294>: cmp $0xa,%ax
0xffffffff8134cab9 <+298>: jne 0xffffffff8134cac9 <__udp4_lib_lookup+314>

334 !ipv6_only_sock(sk)) {
0xffffffff8134cabb <+300>: mov 0x270(%rsi),%r9
0xffffffff8134cac2 <+307>: testb $0x10,0x6a(%r9)
0xffffffff8134cac7 <+312>: jne 0xffffffff8134cb27 <__udp4_lib_lookup+408>

335 struct inet_sock *inet = inet_sk(sk);
336
337 score = (sk->sk_family == PF_INET ? 1 : 0);
0xffffffff8134cac9 <+314>: cmp $0x2,%ax

338 if (inet->inet_rcv_saddr) {
0xffffffff8134cacd <+318>: mov 0x4(%rsi),%r9d

337 score = (sk->sk_family == PF_INET ? 1 : 0);
0xffffffff8134cad1 <+322>: sete %al

338 if (inet->inet_rcv_saddr) {
0xffffffff8134cad4 <+325>: test %r9d,%r9d

337 score = (sk->sk_family == PF_INET ? 1 : 0);
0xffffffff8134cad7 <+328>: movzbl %al,%eax

338 if (inet->inet_rcv_saddr) {
0xffffffff8134cada <+331>: je 0xffffffff8134cae4 <__udp4_lib_lookup+341>

339 if (inet->inet_rcv_saddr != daddr)
0xffffffff8134cadc <+333>: cmp %r15d,%r9d
0xffffffff8134cadf <+336>: jne 0xffffffff8134cb27 <__udp4_lib_lookup+408>

340 return -1;
341 score += 2;
0xffffffff8134cae1 <+338>: add $0x2,%eax

342 }
343 if (inet->inet_daddr) {
0xffffffff8134cae4 <+341>: mov (%rsi),%r9d
0xffffffff8134cae7 <+344>: test %r9d,%r9d
0xffffffff8134caea <+347>: je 0xffffffff8134caf5 <__udp4_lib_lookup+358>

344 if (inet->inet_daddr != saddr)
0xffffffff8134caec <+349>: cmp -0x38(%rbp),%r9d
0xffffffff8134caf0 <+353>: jne 0xffffffff8134cb27 <__udp4_lib_lookup+408>

345 return -1;
346 score += 2;
0xffffffff8134caf2 <+355>: add $0x2,%eax

347 }
348 if (inet->inet_dport) {
0xffffffff8134caf5 <+358>: mov 0x278(%rsi),%r9d
0xffffffff8134cafc <+365>: test %r9w,%r9w
0xffffffff8134cb00 <+369>: je 0xffffffff8134cb0c <__udp4_lib_lookup+381>

349 if (inet->inet_dport != sport)
0xffffffff8134cb02 <+371>: cmp -0x3e(%rbp),%r9w
0xffffffff8134cb07 <+376>: jne 0xffffffff8134cb27 <__udp4_lib_lookup+408>

351 score += 2;
0xffffffff8134cb09 <+378>: add $0x2,%eax

352 }
353 if (sk->sk_bound_dev_if) {
0xffffffff8134cb0c <+381>: mov 0x10(%rsi),%r9d
0xffffffff8134cb10 <+385>: test %r9d,%r9d
0xffffffff8134cb13 <+388>: je 0xffffffff8134cb1e <__udp4_lib_lookup+399>

354 if (sk->sk_bound_dev_if != dif)
0xffffffff8134cb15 <+390>: cmp -0x3c(%rbp),%r9d
0xffffffff8134cb19 <+394>: jne 0xffffffff8134cb27 <__udp4_lib_lookup+408>

355 return -1;
356 score += 2;
0xffffffff8134cb1b <+396>: add $0x2,%eax

488 score = compute_score(sk, net, saddr, hnum, sport,
489 daddr, dport, dif);
490 if (score > badness) {
0xffffffff8134cb1e <+399>: cmp %edx,%eax
0xffffffff8134cb20 <+401>: jle 0xffffffff8134cb27 <__udp4_lib_lookup+408>
0xffffffff8134cb22 <+403>: mov %eax,%edx

491 result = sk;
0xffffffff8134cb24 <+405>: mov %rsi,%rdi

487 sk_nulls_for_each_rcu(sk, node, &hslot->head) {
0xffffffff8134cb27 <+408>: mov (%rcx),%rcx
0xffffffff8134cb2a <+411>: jmp 0xffffffff8134cb34 <__udp4_lib_lookup+421>
0xffffffff8134cb2c <+413>: mov %r8,%rcx <==
^~~~~~~~ Here, the value of r8 assigns to rcx in every loop,
it means that the original hslot->head is cached in r8.
It should re-access (%rbx) to get the value of hslot->head again, is that right?

486 badness = -1;
0xffffffff8134cb2f <+416>: or $0xffffffff,%edx

485 result = NULL;
0xffffffff8134cb32 <+419>: xor %edi,%edi

487 sk_nulls_for_each_rcu(sk, node, &hslot->head) {
0xffffffff8134cb34 <+421>: test $0x1,%cl
0xffffffff8134cb37 <+424>: jne 0xffffffff8134cb48 <__udp4_lib_lookup+441>
0xffffffff8134cb39 <+426>: lea -0x38(%rcx),%rsi

333 if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
0xffffffff8134cb3d <+430>: cmp %r12,-0x8(%rcx)
0xffffffff8134cb41 <+434>: jne 0xffffffff8134cb27 <__udp4_lib_lookup+408>
0xffffffff8134cb43 <+436>: jmpq 0xffffffff8134caab <__udp4_lib_lookup+284>

include/linux/list_nulls.h:
46 return ((unsigned long)ptr) >> 1;
0xffffffff8134cb48 <+441>: shr %rcx

/linux-3.2/net/ipv4/udp.c:
500 if (get_nulls_value(node) != slot)
0xffffffff8134cb4b <+444>: cmp -0x48(%rbp),%rcx
0xffffffff8134cb4f <+448>: jne 0xffffffff8134cb2c <__udp4_lib_lookup+413> => goto +413, but it isn't equal to "goto begin".

501 goto begin;
502
503 if (result) {
0xffffffff8134cb51 <+450>: test %rdi,%rdi
0xffffffff8134cb54 <+453>: je 0xffffffff8134cc0f <__udp4_lib_lookup+640>
0xffffffff8134cb5a <+459>: mov $0x2,%ecx
0xffffffff8134cb5f <+464>: jmp 0xffffffff8134cb63 <__udp4_lib_lookup+468>

include/linux/atomic.h:
55 } while (c);
0xffffffff8134cb61 <+466>: mov %eax,%ecx

51 val = atomic_cmpxchg(v, c, c + 1);
0xffffffff8134cb63 <+468>: lea 0x1(%rcx),%esi

/linux-3.2/arch/x86/include/asm/atomic.h:
211 return cmpxchg(&v->counter, old, new);
0xffffffff8134cb66 <+471>: mov %ecx,%eax
0xffffffff8134cb68 <+473>: lock cmpxchg %esi,0x4c(%rdi)

include/linux/atomic.h:
52 if (val == c)
0xffffffff8134cb6d <+478>: cmp %ecx,%eax
0xffffffff8134cb6f <+480>: je 0xffffffff8134cc21 <__udp4_lib_lookup+658>

53 return 1;
54 c = val;
55 } while (c);
0xffffffff8134cb75 <+486>: test %eax,%eax
0xffffffff8134cb77 <+488>: jne 0xffffffff8134cb61 <__udp4_lib_lookup+466>

/linux-3.2/net/ipv4/udp.c:
505 result = NULL;
0xffffffff8134cb79 <+490>: xor %edi,%edi
0xffffffff8134cb7b <+492>: jmpq 0xffffffff8134cc0f <__udp4_lib_lookup+640>

331 int score = -1;
0xffffffff8134cb80 <+497>: or $0xffffffff,%eax

332
333 if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
0xffffffff8134cb83 <+500>: cmp %r13w,0x8(%rdi)
0xffffffff8134cb88 <+505>: jne 0xffffffff8134cbf2 <__udp4_lib_lookup+611>

334 !ipv6_only_sock(sk)) {
0xffffffff8134cb8a <+507>: mov 0xc(%rdi),%ecx

333 if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
0xffffffff8134cb8d <+510>: cmp $0xa,%cx
0xffffffff8134cb91 <+514>: jne 0xffffffff8134cba0 <__udp4_lib_lookup+529>

334 !ipv6_only_sock(sk)) {
0xffffffff8134cb93 <+516>: mov 0x270(%rdi),%rsi
0xffffffff8134cb9a <+523>: testb $0x10,0x6a(%rsi)
0xffffffff8134cb9e <+527>: jne 0xffffffff8134cbf2 <__udp4_lib_lookup+611>

335 struct inet_sock *inet = inet_sk(sk);
336
337 score = (sk->sk_family == PF_INET ? 1 : 0);
0xffffffff8134cba0 <+529>: xor %eax,%eax
0xffffffff8134cba2 <+531>: cmp $0x2,%cx

338 if (inet->inet_rcv_saddr) {
0xffffffff8134cba6 <+535>: mov 0x4(%rdi),%ecx

337 score = (sk->sk_family == PF_INET ? 1 : 0);
0xffffffff8134cba9 <+538>: sete %al

338 if (inet->inet_rcv_saddr) {
0xffffffff8134cbac <+541>: test %ecx,%ecx
0xffffffff8134cbae <+543>: je 0xffffffff8134cbb8 <__udp4_lib_lookup+553>

339 if (inet->inet_rcv_saddr != daddr)
0xffffffff8134cbb0 <+545>: cmp %r15d,%ecx
0xffffffff8134cbb3 <+548>: jne 0xffffffff8134cbef <__udp4_lib_lookup+608>

340 return -1;
341 score += 2;
0xffffffff8134cbb5 <+550>: add $0x2,%eax

342 }
343 if (inet->inet_daddr) {
0xffffffff8134cbb8 <+553>: mov (%rdi),%ecx
0xffffffff8134cbba <+555>: test %ecx,%ecx
0xffffffff8134cbbc <+557>: je 0xffffffff8134cbc6 <__udp4_lib_lookup+567>

344 if (inet->inet_daddr != saddr)
0xffffffff8134cbbe <+559>: cmp -0x38(%rbp),%ecx
0xffffffff8134cbc1 <+562>: jne 0xffffffff8134cbef <__udp4_lib_lookup+608>

345 return -1;
346 score += 2;
0xffffffff8134cbc3 <+564>: add $0x2,%eax

347 }
348 if (inet->inet_dport) {
0xffffffff8134cbc6 <+567>: mov 0x278(%rdi),%ecx
0xffffffff8134cbcc <+573>: test %cx,%cx
0xffffffff8134cbcf <+576>: je 0xffffffff8134cbda <__udp4_lib_lookup+587>

349 if (inet->inet_dport != sport)
0xffffffff8134cbd1 <+578>: cmp -0x3e(%rbp),%cx
0xffffffff8134cbd5 <+582>: jne 0xffffffff8134cbef <__udp4_lib_lookup+608>

351 score += 2;
0xffffffff8134cbd7 <+584>: add $0x2,%eax

352 }
353 if (sk->sk_bound_dev_if) {
0xffffffff8134cbda <+587>: mov 0x10(%rdi),%ecx
0xffffffff8134cbdd <+590>: test %ecx,%ecx
0xffffffff8134cbdf <+592>: je 0xffffffff8134cbf2 <__udp4_lib_lookup+611>

355 return -1;
356 score += 2;
0xffffffff8134cbe1 <+594>: lea 0x2(%rax),%esi
0xffffffff8134cbe4 <+597>: or $0xffffffff,%eax
0xffffffff8134cbe7 <+600>: cmp -0x3c(%rbp),%ecx
0xffffffff8134cbea <+603>: cmove %esi,%eax
0xffffffff8134cbed <+606>: jmp 0xffffffff8134cbf2 <__udp4_lib_lookup+611>

350 return -1;
0xffffffff8134cbef <+608>: or $0xffffffff,%eax

506 else if (unlikely(compute_score(result, net, saddr, hnum, sport,
0xffffffff8134cbf2 <+611>: cmp %edx,%eax
0xffffffff8134cbf4 <+613>: jge 0xffffffff8134cc0f <__udp4_lib_lookup+640>

/linux-3.2/arch/x86/include/asm/atomic.h:
123 asm volatile(LOCK_PREFIX "decl %0; sete %1"
0xffffffff8134cbf6 <+615>: lock decl 0x4c(%rdi)
0xffffffff8134cbfa <+619>: sete %al

include/net/sock.h:
1257 if (atomic_dec_and_test(&sk->sk_refcnt))
0xffffffff8134cbfd <+622>: test %al,%al
0xffffffff8134cbff <+624>: je 0xffffffff8134caa3 <__udp4_lib_lookup+276>

1258 sk_free(sk);
0xffffffff8134cc05 <+630>: callq 0xffffffff812e873f <sk_free>
0xffffffff8134cc0a <+635>: jmpq 0xffffffff8134caa3 <__udp4_lib_lookup+276>

/linux-3.2/net/ipv4/udp.c:
514 }
0xffffffff8134cc0f <+640>: add $0x48,%rsp
0xffffffff8134cc13 <+644>: mov %rdi,%rax
0xffffffff8134cc16 <+647>: pop %rbx
0xffffffff8134cc17 <+648>: pop %r12
0xffffffff8134cc19 <+650>: pop %r13
0xffffffff8134cc1b <+652>: pop %r14
0xffffffff8134cc1d <+654>: pop %r15
0xffffffff8134cc1f <+656>: leaveq
0xffffffff8134cc20 <+657>: retq

333 if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&
0xffffffff8134cc21 <+658>: cmp %r12,0x30(%rdi)
0xffffffff8134cc25 <+662>: jne 0xffffffff8134cbef <__udp4_lib_lookup+608>
0xffffffff8134cc27 <+664>: jmpq 0xffffffff8134cb80 <__udp4_lib_lookup+497>
End of assembler dump.


The value of r8 assigns to rcx in every loop, it means that the original
hslot->head is cached in r8. It should re-access (%rbx) to get the value
of hslot->head again, is that right?

I would greatly appreciate if you kindly give me some feedback.

Best regards,
Jason Cai