[ 2.0.31-PRE10 VM BUG ]

Dan Hollis (goemon@sasami.anime.net)
Mon, 13 Oct 1997 21:53:23 -0700 (PDT)


Kernel 2.0.31-pre10 seems to have a very very big problem with VM:

When the system runs out of physical ram, it locks up even if there's
hundreds of megabytes of swap space available. This happens a lot on our
news server during expires.

Doing right-shift+scroll-lock reveals the kernel looping at these
addresses:

111333
1113e0
111485
111801
124777
124810
125e4c
125e8a
125e8f

(gdb) l *0x111333
0x111333 is in schedule (sched.c:450).
445 */
446 /* this is the scheduler proper: */
447 c = -1000;
448 next = idle_task;
449 while (p != &init_task) {
450 int weight = goodness(p, prev, this_cpu);
451 if (weight > c)
452 c = weight, next = p;
453 p = p->next_run;
454 }

(gdb) l *0x1113e0
0x1113e0 is in schedule (sched.c:488).
483 timer.data = (unsigned long) prev;
484 timer.function = process_timeout;
485 add_timer(&timer);
486 }
487 get_mmu_context(next);
488 switch_to(prev,next);
489 if (timeout)
490 del_timer(&timer);
491 }
492 return;

(gdb) l *0x111485
0x111485 is in wake_up (sched.c:533).
528 return;
529 head = WAIT_QUEUE_HEAD(q);
530 while (next != head) {
531 struct task_struct *p = next->task;
532 next = next->next;
533 if (p != NULL) {
534 if ((p->state == TASK_UNINTERRUPTIBLE) ||
535 (p->state == TASK_INTERRUPTIBLE))
536 wake_up_process(p);
537 }

(gdb) l *0x111801
0x111801 is in sleep_on (sched.c:733).
728 }
729
730 void sleep_on(struct wait_queue **p)
731 {
732 __sleep_on(p,TASK_UNINTERRUPTIBLE);
733 }
734
735 #define TVN_BITS 6
736 #define TVR_BITS 8
737 #define TVN_SIZE (1 << TVN_BITS)

(gdb) l *0x124777
0x124777 is in find_candidate (buffer.c:572).
567 for (; (*list_len) > 0; bh = bh->b_next_free, (*list_len)--) {
568 if (size != bh->b_size) {
569 /* this provides a mechanism for freeing blocks
570 of other sizes, this is necessary now that we
571 no longer have the lav code. */
572 try_to_free_buffer(bh,&bh,1);
573 if (!bh)
574 break;
575 lookahead = 7;
576 continue;

(gdb) l *0x124810
0x124810 is in find_candidate (buffer.c:567).
562 int lookahead = 7;
563
564 if (!bh)
565 goto no_candidate;
566
567 for (; (*list_len) > 0; bh = bh->b_next_free, (*list_len)--) {
568 if (size != bh->b_size) {
569 /* this provides a mechanism for freeing blocks
570 of other sizes, this is necessary now that we
571 no longer have the lav code. */

(gdb) l *0x125e4c
0x125e4c is in try_to_free_buffer (buffer.c:1403).
1398 page = (unsigned long) bh->b_data;
1399 page &= PAGE_MASK;
1400 tmp = bh;
1401 do {
1402 if (!tmp)
1403 return 0;
1404 if (tmp->b_count || buffer_protected(tmp) ||
1405 buffer_dirty(tmp) || buffer_locked(tmp) ||
1406 buffer_waiting(tmp))
1407 return 0;

0x125e8a is in try_to_free_buffer (buffer.c:1408).
1403 return 0;
1404 if (tmp->b_count || buffer_protected(tmp) ||
1405 buffer_dirty(tmp) || buffer_locked(tmp) ||
1406 buffer_waiting(tmp))
1407 return 0;
1408 if (priority && buffer_touched(tmp))
1409 return 0;
1410 tmp = tmp->b_this_page;
1411 } while (tmp != bh);
1412 tmp = bh;

-Dan