Re: [PATCH 0/3] enhanced ESTALE error handling

From: J. Bruce Fields
Date: Fri Jan 18 2008 - 10:47:04 EST


On Fri, Jan 18, 2008 at 10:35:50AM -0500, Peter Staubach wrote:
> Hi.
>
> Here is a patch set which modifies the system to enhance the
> ESTALE error handling for system calls which take pathnames
> as arguments.

I think your cover letter may be bigger than any of the actual
patches.... I'm not complaining! But would it be worth adding this
explanation and test code to Documentation/filesystems/ just to keep it
around?

--b.

>
> The error, ESTALE, was originally introduced to handle the
> situation where a file handle, which NFS uses to uniquely
> identify a file on the server, no longer refers to a valid file
> on the server. This can happen when the file is removed on the
> server, either by an application on the server, some other
> client accessing the server, or sometimes even by another
> mounted file system from the same client. It can also happen
> when the file resides upon a file system which is no longer
> exported.
>
> The error, ESTALE, is usually seen when cached directory
> information is used to convert a pathname to a dentry/inode pair.
> The information is discovered to be out of date or stale when a
> subsequent operation is sent to the NFS server. This can easily
> happen in system calls such as stat(2) when the pathname is
> converted a dentry/inode pair using cached information, but then
> a subsequent GETATTR call to the server discovers that the file
> handle is no longer valid.
>
> System calls which take pathnames as arguments should never see
> ESTALE errors from situations like this. These system calls
> should either fail with an ENOENT error if the pathname can not
> be successfully be translated to a dentry/inode pair or succeed
> or fail based on their own semantics.
>
> ESTALE errors which occur during the lookup process can be
> handled by dropping the dentry which refers to the non-existent
> file from the dcache and then restarting the lookup process.
> Care can be taken to ensure that forward progress is always
> being made in order to avoiding infinite loops.
>
> ESTALE errors which occur during operations subsequent to the
> lookup process can be handled by unwinding appropriately and
> then performing the lookup process again. Eventually, either
> the lookup process will succeed or fail correctly or the
> subsequent operation will succeed or fail on its own merits.
>
> This support is desired in order to tighten up recovery from
> discovering stale resources due to the loose cache consistency
> semantics that file systems such as NFS employ. In particular,
> there are several large Red Hat customers, converting from
> Solaris to Linux, who desire this support in order that their
> applications environments continue to work.
>
> Please note that system calls which do not take pathnames as
> arguments or perhaps use file descriptors to identify the
> file to be manipulated may still fail with ESTALE errors.
> There is no recovery possible with these systems calls like
> there is with system calls which take pathnames as arguments.
>
> This support was tested using the attached programs and
> running multiple copies on mounted file systems which do not
> share superblocks. When two or more copies of this program
> are running, many ESTALE errors can be seen over the network.
>
> Comments?
>
> Thanx...
>
> ps

> #
> #define _XOPEN_SOURCE 500
> #define _LARGEFILE64_SOURCE
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <sys/statfs.h>
> #include <sys/inotify.h>
> #include <errno.h>
> #include <fcntl.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <unistd.h>
> #include <signal.h>
>
> void mkdir_test(void);
> void link_test(void);
> void open_test(void);
> void access_test(void);
> void chmod_test(void);
> void chown_test(void);
> void readlink_test(void);
> void utimes_test(void);
> void chdir_test(void);
> void chroot_test(void);
> void rename_test(void);
> void exec_test(void);
> void mknod_test(void);
> void statfs_test(void);
> void truncate_test(void);
> void xattr_test(void);
> void inotify_test(void);
>
> struct tests {
> void (*test)(void);
> };
>
> struct tests tests[] = {
> mkdir_test,
> link_test,
> open_test,
> access_test,
> chmod_test,
> chown_test,
> readlink_test,
> utimes_test,
> chdir_test,
> chroot_test,
> rename_test,
> exec_test,
> mknod_test,
> statfs_test,
> truncate_test,
> xattr_test,
> inotify_test
> };
>
> pid_t test_pids[sizeof(tests) / sizeof(tests[0])];
>
> pid_t parent_pid;
>
> void kill_tests(int);
>
> int
> main(int argc, char *argv[])
> {
> int i;
>
> parent_pid = getpid();
>
> sigset(SIGINT, kill_tests);
>
> sighold(SIGINT);
>
> for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
> test_pids[i] = fork();
> if (test_pids[i] == 0) {
> for (;;)
> (*tests[i].test)();
> /* NOTREACHED */
> }
> }
>
> sigrelse(SIGINT);
>
> pause();
> }
>
> void
> kill_tests(int sig)
> {
> int i;
>
> for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
> if (test_pids[i] != -1) {
> if (kill(test_pids[i], SIGTERM) < 0)
> perror("kill");
> }
> }
>
> exit(0);
> }
>
> void
> check_error(int error, char *operation)
> {
>
> if (error < 0 && errno == ESTALE) {
> perror(operation);
> kill(parent_pid, SIGINT);
> pause();
> }
> }
>
> void
> check_error_child(int error, char *operation)
> {
>
> if (error < 0 && errno == ESTALE) {
> perror(operation);
> kill(parent_pid, SIGINT);
> exit(1);
> }
> }
>
> void
> do_stats(char *file)
> {
> int error;
> struct stat stbuf;
> struct stat64 stbuf64;
>
> error = stat(file, &stbuf);
> check_error(error, "stat");
>
> error = stat64(file, &stbuf64);
> check_error(error, "stat64");
>
> error = lstat(file, &stbuf);
> check_error(error, "lstat");
>
> error = lstat64(file, &stbuf64);
> check_error(error, "lstat64");
> }
>
> void
> do_stats_child(char *file)
> {
> int error;
> struct stat stbuf;
> struct stat64 stbuf64;
>
> error = stat(file, &stbuf);
> check_error_child(error, "stat");
>
> error = stat64(file, &stbuf64);
> check_error_child(error, "stat64");
>
> error = lstat(file, &stbuf);
> check_error_child(error, "lstat");
>
> error = lstat64(file, &stbuf64);
> check_error_child(error, "lstat64");
> }
>
> char *mkdir_dirs[] = {
> "mkdir/a",
> "mkdir/a/b",
> "mkdir/a/b/c",
> "mkdir/a/b/c/d",
> "mkdir/a/b/c/d/e",
> "mkdir/a/b/c/d/e/f",
> "mkdir/a/b/c/d/e/f/g",
> "mkdir/a/b/c/d/e/f/g/h",
> "mkdir/a/b/c/d/e/f/g/h/i",
> "mkdir/a/b/c/d/e/f/g/h/i/j",
> "mkdir/a/b/c/d/e/f/g/h/i/j/k",
> "mkdir/a/b/c/d/e/f/g/h/i/j/k/l",
> "mkdir/a/b/c/d/e/f/g/h/i/j/k/l/m",
> "mkdir/a/b/c/d/e/f/g/h/i/j/k/l/m/n",
> "mkdir/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o",
> "mkdir/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p",
> "mkdir/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q",
> "mkdir/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r",
> "mkdir/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s",
> "mkdir/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t",
> "mkdir/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u",
> "mkdir/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v",
> "mkdir/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w",
> "mkdir/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x",
> "mkdir/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y",
> "mkdir/a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z",
> NULL
> };
>
> void
> mkdir_test()
> {
> int i;
> int error;
>
> error = mkdir("mkdir", 0755);
> check_error(error, "mkdir");
>
> for (i = 0; mkdir_dirs[i] != NULL; i++) {
> error = mkdir(mkdir_dirs[i], 0755);
> check_error(error, "mkdir");
> do_stats(mkdir_dirs[i]);
> }
>
> while (--i >= 0) {
> do_stats(mkdir_dirs[i]);
> error = rmdir(mkdir_dirs[i]);
> check_error(error, "rmdir");
> }
>
> error = rmdir("mkdir");
> check_error(error, "rmdir");
> }
>
> char *link_file_a = "link/a";
> char *link_file_b = "link/b";
>
> void
> link_test()
> {
> int error;
> int fd;
>
> error = mkdir("link", 0755);
> check_error(error, "mkdir");
>
> fd = open(link_file_a, O_CREAT, 0644);
> check_error(fd, "open");
>
> (void) close(fd);
>
> do_stats(link_file_a);
>
> error = link(link_file_a, link_file_b);
> check_error(error, "link");
> do_stats(link_file_a);
> do_stats(link_file_b);
>
> error = unlink(link_file_a);
> check_error(error, "unlink");
> do_stats(link_file_a);
> do_stats(link_file_b);
>
> error = link(link_file_b, link_file_a);
> check_error(error, "link");
> do_stats(link_file_a);
> do_stats(link_file_b);
>
> error = unlink(link_file_b);
> check_error(error, "unlink");
> do_stats(link_file_a);
> do_stats(link_file_b);
>
> error = unlink(link_file_a);
> check_error(error, "unlink");
> do_stats(link_file_a);
> do_stats(link_file_b);
>
> error = rmdir("link");
> check_error(error, "rmdir");
> }
>
> char *open_file = "open/a";
>
> void
> open_test()
> {
> int error;
> int fd;
>
> error = mkdir("open", 0755);
> check_error(error, "mkdir");
>
> fd = open(open_file, O_CREAT | O_RDWR, 0644);
> check_error(fd, "open: O_CREAT");
>
> (void) close(fd);
>
> do_stats(open_file);
>
> fd = open(open_file, O_RDWR);
> check_error(fd, "open: O_RDWR");
>
> (void) close(fd);
>
> do_stats(open_file);
>
> error = unlink(open_file);
> check_error(error, "unlink");
>
> error = rmdir("open");
> check_error(error, "rmdir");
> }
>
> char *access_file = "access/a";
>
> void
> access_test()
> {
> int error;
> int fd;
>
> error = mkdir("access", 0755);
> check_error(error, "mkdir");
>
> fd = open(access_file, O_CREAT | O_RDWR, 0644);
> check_error(fd, "open: O_CREAT");
>
> (void) close(fd);
>
> do_stats(access_file);
>
> error = access(access_file, F_OK);
> check_error(error, "access");
>
> do_stats(access_file);
>
> error = unlink(access_file);
> check_error(error, "unlink");
>
> error = rmdir("access");
> check_error(error, "rmdir");
> }
>
> char *chmod_file = "chmod/a";
>
> void
> chmod_test()
> {
> int error;
> int fd;
>
> error = mkdir("chmod", 0755);
> check_error(error, "mkdir");
>
> fd = open(chmod_file, O_CREAT | O_RDWR, 0644);
> check_error(fd, "open: O_CREAT");
>
> (void) close(fd);
>
> do_stats(chmod_file);
>
> error = chmod(chmod_file, 0600);
> check_error(error, "chmod");
>
> do_stats(chmod_file);
>
> error = unlink(chmod_file);
> check_error(error, "unlink");
>
> error = rmdir("chmod");
> check_error(error, "rmdir");
> }
>
> char *chown_file = "chown/a";
>
> void
> chown_test()
> {
> int error;
> int fd;
>
> error = mkdir("chown", 0755);
> check_error(error, "mkdir");
>
> fd = open(chown_file, O_CREAT | O_RDWR, 0644);
> check_error(fd, "open: O_CREAT");
>
> (void) close(fd);
>
> do_stats(chown_file);
>
> error = chown(chown_file, 4597, 4597);
> check_error(error, "chown");
>
> do_stats(chown_file);
>
> error = lchown(chown_file, 4596, 4596);
> check_error(error, "lchown");
>
> do_stats(chown_file);
>
> error = unlink(chown_file);
> check_error(error, "unlink");
>
> error = rmdir("chown");
> check_error(error, "rmdir");
> }
>
> char *readlink_file = "readlink/a";
>
> void
> readlink_test()
> {
> int error;
> char buf[BUFSIZ];
>
> error = mkdir("readlink", 0755);
> check_error(error, "mkdir");
>
> error = symlink("b", readlink_file);
> check_error(error, "symlink");
>
> do_stats(readlink_file);
>
> error = readlink(readlink_file, buf, sizeof(buf));
> check_error(error, "readlink");
>
> do_stats(readlink_file);
>
> error = unlink(readlink_file);
> check_error(error, "unlink");
>
> error = rmdir("readlink");
> check_error(error, "rmdir");
> }
>
> char *utimes_file = "utimes/a";
>
> void
> utimes_test()
> {
> int error;
> int fd;
>
> error = mkdir("utimes", 0755);
> check_error(error, "mkdir");
>
> fd = open(utimes_file, O_CREAT | O_RDWR, 0644);
> check_error(fd, "open: O_CREAT");
>
> (void) close(fd);
>
> do_stats(utimes_file);
>
> error = utime(utimes_file, NULL);
> check_error(error, "utime");
>
> do_stats(utimes_file);
>
> error = utimes(utimes_file, NULL);
> check_error(error, "utimes");
>
> do_stats(utimes_file);
>
> error = unlink(utimes_file);
> check_error(error, "unlink");
>
> error = rmdir("utimes");
> check_error(error, "rmdir");
> }
>
> char *chdir_dir = "chdir/dir";
>
> void
> chdir_test()
> {
> int error;
> int pid;
> int status;
>
> error = mkdir("chdir", 0755);
> check_error(error, "mkdir");
>
> pid = fork();
> if (pid == 0) {
> error = mkdir(chdir_dir, 0755);
> check_error_child(error, "mkdir");
>
> do_stats_child(chdir_dir);
>
> error = chdir(chdir_dir);
> check_error_child(error, "chdir");
>
> do_stats_child(chdir_dir);
>
> exit(0);
> }
>
> (void) wait(&status);
>
> do_stats(chdir_dir);
>
> error = rmdir(chdir_dir);
> check_error(error, "rmdir");
>
> error = rmdir("chdir");
> check_error(error, "rmdir");
> }
>
> char *chroot_dir = "chroot/dir";
>
> void
> chroot_test()
> {
> int error;
> int pid;
> int status;
>
> error = mkdir("chroot", 0755);
> check_error(error, "mkdir");
>
> pid = fork();
> if (pid == 0) {
> error = mkdir(chroot_dir, 0755);
> check_error_child(error, "mkdir");
>
> do_stats_child(chroot_dir);
>
> error = chroot(chroot_dir);
> check_error_child(error, "chroot");
>
> do_stats_child(chroot_dir);
>
> exit(0);
> }
>
> (void) wait(&status);
>
> do_stats(chroot_dir);
>
> error = rmdir(chroot_dir);
> check_error(error, "rmdir");
>
> error = rmdir("chroot");
> check_error(error, "rmdir");
> }
>
> char *rename_file_a = "rename/a";
> char *rename_file_b = "rename/b";
>
> void
> rename_test()
> {
> int error;
> int fd;
>
> error = mkdir("rename", 0755);
> check_error(error, "mkdir");
>
> fd = open(rename_file_a, O_CREAT, 0644);
> check_error(fd, "open");
>
> (void) close(fd);
>
> do_stats(rename_file_a);
>
> error = rename(rename_file_a, rename_file_b);
> check_error(error, "rename");
>
> do_stats(rename_file_a);
> do_stats(rename_file_b);
>
> error = rename(rename_file_b, rename_file_a);
> check_error(error, "rename");
>
> do_stats(rename_file_a);
> do_stats(rename_file_b);
>
> error = unlink(rename_file_a);
> check_error(error, "unlink");
>
> error = rmdir("rename");
> check_error(error, "rmdir");
> }
>
> char *exec_file = "exec/a";
> char *exec_source_file = "exec_test";
>
> void
> exec_test()
> {
> int error;
> int pid;
> int status;
>
> error = mkdir("exec", 0755);
> check_error(error, "mkdir");
>
> error = link(exec_source_file, exec_file);
> check_error(error, "link");
> do_stats(exec_file);
>
> pid = fork();
> if (pid == 0) {
> error = execl(exec_file, exec_file, NULL);
> check_error_child(error, "execl");
>
> exit(1);
> }
>
> wait(&status);
>
> do_stats(exec_file);
>
> error = unlink(exec_file);
> check_error(error, "unlink");
>
> error = rmdir("exec");
> check_error(error, "rmdir");
> }
>
> char *mknod_file = "mknod/a";
>
> void
> mknod_test()
> {
> int error;
>
> error = mkdir("mknod", 0755);
> check_error(error, "mkdir");
>
> error = mknod(mknod_file, S_IFCHR | 0644, 0);
> check_error(error, "mknod");
>
> do_stats(mknod_file);
>
> error = unlink(mknod_file);
> check_error(error, "unlink");
>
> error = rmdir("mknod");
> check_error(error, "rmdir");
> }
>
> void
> statfs_test()
> {
> int error;
> struct statfs stbuf;
> struct statfs64 stbuf64;
>
> error = mkdir("statfs", 0755);
> check_error(error, "mkdir");
>
> do_stats("statfs");
>
> error = statfs("statfs", &stbuf);
> check_error(error, "statfs");
>
> error = statfs64("statfs", &stbuf64);
> check_error(error, "statfs64");
>
> error = rmdir("statfs");
> check_error(error, "rmdir");
> }
>
> char *truncate_file = "truncate/a";
>
> void
> truncate_test()
> {
> int error;
> int fd;
>
> error = mkdir("truncate", 0755);
> check_error(error, "mkdir");
>
> fd = open(truncate_file, O_CREAT | O_RDWR, 0644);
> check_error(fd, "open: O_CREAT");
>
> (void) close(fd);
>
> do_stats(truncate_file);
>
> error = truncate(truncate_file, 1024);
> check_error(error, "truncate");
>
> do_stats(truncate_file);
>
> error = unlink(truncate_file);
> check_error(error, "unlink");
>
> error = rmdir("truncate");
> check_error(error, "rmdir");
> }
>
> char *xattr_file = "xattr/a";
>
> #define ACL_USER_OBJ (0x01)
> #define ACL_USER (0x02)
> #define ACL_GROUP_OBJ (0x04)
> #define ACL_MASK (0x10)
> #define ACL_OTHER (0x20)
>
> struct posix_acl_xattr_entry {
> unsigned short e_tag;
> unsigned short e_perm;
> unsigned int e_id;
> };
>
> #define POSIX_ACL_XATTR_VERSION 0x0002
>
> struct posix_acl_xattr_header {
> unsigned int a_version;
> struct posix_acl_xattr_entry a_entries[5];
> };
>
> void
> xattr_test()
> {
> int error;
> int fd;
> char buf[1024];
> struct posix_acl_xattr_header ents;
>
> error = mkdir("xattr", 0755);
> check_error(error, "mkdir");
>
> fd = open(xattr_file, O_CREAT | O_RDWR, 0444);
> check_error(fd, "open: O_CREAT");
>
> (void) close(fd);
>
> do_stats(xattr_file);
>
> error = getxattr(xattr_file, "system.posix_acl_access", buf,
> sizeof (buf));
> check_error(error, "getxattr");
> error = lgetxattr(xattr_file, "system.posix_acl_access", buf,
> sizeof (buf));
> check_error(error, "lgetxattr");
>
> ents.a_version = POSIX_ACL_XATTR_VERSION;
> ents.a_entries[0].e_tag = ACL_USER_OBJ;
> ents.a_entries[0].e_perm = 06;
> ents.a_entries[0].e_id = -1;
> ents.a_entries[1].e_tag = ACL_USER;
> ents.a_entries[1].e_perm = 06;
> ents.a_entries[1].e_id = 10;
> ents.a_entries[2].e_tag = ACL_GROUP_OBJ;
> ents.a_entries[2].e_perm = 06;
> ents.a_entries[2].e_id = -1;
> ents.a_entries[3].e_tag = ACL_MASK;
> ents.a_entries[3].e_perm = 06;
> ents.a_entries[3].e_id = -1;
> ents.a_entries[4].e_tag = ACL_OTHER;
> ents.a_entries[4].e_perm = 06;
> ents.a_entries[4].e_id = -1;
>
> error = setxattr(xattr_file, "system.posix_acl_access",
> &ents, sizeof (ents), 0);
> check_error(error, "setxattr");
>
> do_stats(xattr_file);
>
> error = lsetxattr(xattr_file, "system.posix_acl_access",
> &ents, sizeof (ents), 0);
> check_error(error, "lsetxattr");
>
> do_stats(xattr_file);
>
> error = getxattr(xattr_file, "system.posix_acl_access", buf,
> sizeof (buf));
> check_error(error, "getxattr");
> error = lgetxattr(xattr_file, "system.posix_acl_access", buf,
> sizeof (buf));
> check_error(error, "lgetxattr");
>
> error = listxattr(xattr_file, buf, sizeof (buf));
> check_error(error, "listxattr");
> error = llistxattr(xattr_file, buf, sizeof (buf));
> check_error(error, "llistxattr");
>
> error = removexattr(xattr_file, "system.posix_acl_access");
> check_error(error, "removexattr");
>
> do_stats(xattr_file);
>
> error = setxattr(xattr_file, "system.posix_acl_access",
> &ents, sizeof (ents), 0);
> check_error(error, "setxattr");
>
> do_stats(xattr_file);
>
> error = lremovexattr(xattr_file, "system.posix_acl_access");
> check_error(error, "lremovexattr");
>
> do_stats(xattr_file);
>
> error = unlink(xattr_file);
> check_error(error, "unlink");
>
> error = rmdir("xattr");
> check_error(error, "rmdir");
> }
>
> char *inotify_file = "inotify/a";
>
> void
> inotify_test()
> {
> int error;
> int fd;
> int wd;
>
> error = mkdir("inotify", 0755);
> check_error(error, "mkdir");
>
> fd = open(inotify_file, O_CREAT | O_RDWR, 0644);
> check_error(fd, "open: O_CREAT");
>
> (void) close(fd);
>
> do_stats(inotify_file);
>
> fd = inotify_init();
> check_error(error, "inotify_init");
>
> do_stats(inotify_file);
>
> wd = inotify_add_watch(fd, inotify_file, IN_ALL_EVENTS);
> check_error(wd, "inotify_add_watch");
>
> do_stats(inotify_file);
>
> error = inotify_rm_watch(fd, wd);
> check_error(error, "inotify_rm_watch");
>
> (void) close(fd);
>
> do_stats(inotify_file);
>
> error = unlink(inotify_file);
> check_error(error, "unlink");
>
> error = rmdir("inotify");
> check_error(error, "rmdir");
> }

> #include <stdlib.h>
>
> main()
> {
> exit(0);
> }

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/