Re: fsck intermittant failure

tytso@mit.edu
Thu, 21 Oct 1999 13:48:49 -0400


Date: Thu, 21 Oct 1999 01:25:45 -0400
From: ronis <ronis@cloud9.net>

Runnin rh 6.1 with kernel version 2.12-20.
On booting, get message: fsck.ext2 for device /dev/hdb6 exited with
signal 10.
You drop down into shell and then reboot.
On subsequent boot fsck for /dev/hdb6 WORKS, no message.
On booting a Third time, get message: fsck.ext2 for device /dev/hdb6
exited with signal 10.
This flip flops every time.

This is a known bug, I will be releasing e2fsprogs 1.16 shortly to fix
this (and other issues). My apologies for the inconvenience. In the
meantime, here's a patch which fixes the problem, if you're willing to
recompile e2fsprogs....

- Ted

Index: fsck.c
===================================================================
RCS file: /usr/src/CVS-REPO/e2fsprogs/misc/fsck.c,v
retrieving revision 1.19
retrieving revision 1.18
diff -u -r1.19 -r1.18
--- fsck.c 1999/10/20 18:11:01 1.19
+++ fsck.c 1999/07/19 15:30:18 1.18
@@ -33,7 +33,6 @@
#include <limits.h>
#include <stdio.h>
#include <string.h>
-#include <time.h>
#if HAVE_STDLIB_H
#include <stdlib.h>
#endif
@@ -378,7 +377,7 @@
{
char *s, *argv[80], prog[80];
int argc, i;
- struct fsck_instance *inst, *p;
+ struct fsck_instance *inst;
pid_t pid;

inst = malloc(sizeof(struct fsck_instance));
@@ -434,18 +433,8 @@
inst->prog = string_copy(prog);
inst->type = string_copy(type);
inst->device = string_copy(device);
- inst->start_time = time(0);
- inst->next = NULL;
-
- /*
- * Find the end of the list, so we add the instance on at the end.
- */
- for (p = instance_list; p && p->next; p = p->next);
-
- if (p)
- p->next = instance_list;
- else
- instance_list = inst;
+ inst->next = instance_list;
+ instance_list = inst;

return 0;
}
@@ -471,28 +460,31 @@
return(inst);
}

- do {
- pid = wait(&status);
- if (pid < 0) {
- if ((errno == EINTR) || (errno == EAGAIN))
- continue;
- if (errno == ECHILD) {
- fprintf(stderr,
- "%s: wait: No more child process?!?\n",
- progname);
- return NULL;
- }
- perror("wait");
- continue;
+retry:
+ pid = wait(&status);
+ if (pid < 0) {
+ if ((errno == EINTR) || (errno == EAGAIN))
+ goto retry;
+ if (errno == ECHILD) {
+ fprintf(stderr,
+ "%s: wait: No more child process?!?\n",
+ progname);
+ return NULL;
}
- for (prev = 0, inst = instance_list;
- inst;
- prev = inst, inst = inst->next) {
- if (inst->pid == pid)
- break;
- }
- } while (!inst);
-
+ perror("wait");
+ goto retry;
+ }
+ for (prev = 0, inst = instance_list;
+ inst;
+ prev = inst, inst = inst->next) {
+ if (inst->pid == pid)
+ break;
+ }
+ if (!inst) {
+ printf("Unexpected child process %d, status = 0x%x\n",
+ pid, status);
+ goto retry;
+ }
if (WIFEXITED(status))
status = WEXITSTATUS(status);
else if (WIFSIGNALED(status)) {
@@ -522,19 +514,7 @@
continue;
if (strcmp(inst2->type, "ext2"))
continue;
- /*
- * If we've just started the fsck, wait a tiny
- * bit before sending the kill, to give it
- * time to set up the signal handler
- */
- if (inst2->start_time < time(0)+2) {
- if (fork() == 0) {
- sleep(1);
- kill(inst2->pid, SIGUSR1);
- exit(0);
- }
- } else
- kill(inst2->pid, SIGUSR1);
+ kill(inst2->pid, SIGUSR1);
break;
}
}
Index: fsck.h
===================================================================
RCS file: /usr/src/CVS-REPO/e2fsprogs/misc/fsck.h,v
retrieving revision 1.4
retrieving revision 1.3
diff -u -r1.4 -r1.3
--- fsck.h 1999/10/20 18:11:01 1.4
+++ fsck.h 1999/07/19 15:30:19 1.3
@@ -49,7 +49,6 @@
int pid;
int flags;
int exit_status;
- time_t start_time;
char * prog;
char * type;
char * device;

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/