HTTPFS in about 200 lines of C

baccala@FreeSoft.org
Sun, 5 Apr 1998 01:22:49 -0500 (EST)


--477024386-269167349-891757377=:12572
Content-Type: TEXT/plain; CHARSET=US-ASCII

Just a quick cross-posting to share a 4-hour hack with everyone...

For httpfs readers: This is a minimal, read-only implementation of an
"HTTPFS" that works by overloading the open() and stat() calls (and
their varients) in libc. Ordinary program can now "open" URLs as if
they were files. It's all user-space, too.

For libc-hackers: Just another little application for LD_PRELOAD. In
particular, I'm trying to figure out if there's a way to achieve
closure on the filesystem operations. I had to overload open() to make
"more" happy, and __open() to make "less" happy - neither one alone
seemed to satisfy both programs. Is there (or can we make) a list of
all libc functions that need to be overloaded in order to intercept all
filesystem access?

For linux-kernel readers, particularly those on the thread about gzip
and tar fs's. Take a look at this code. It's not exactly what you
want, but maybe something along these lines would suffice for your
purposes - and without any kernel hacking.

PREREQUISITES:

- A fairly recent ld.so and libdl. I've had good results with
glibc/ld.so/libdl 2.0.6 on a RedHat 5.0 i386 box. Core dumps on
glibc 1; ld.so/libdl 1.8.0 on RedHat 4.2 sparc. Will investigate
more next week.

- Perl 5, with the LWP package, providing the "lwp-request" script.
All the HTTP ops are performed by spawning out to this script.

COMPILATION:

- gcc -nostartfiles -fpic -shared -o httpfs.so httpfs.c -ldl

- Then (I do this in a seperate window):
export LD_PRELOAD=./httpfs.so

NOW:

- cat http://www.freesoft.org/index.html

- cp http://www.freesoft.org/index.html .

- Or even: !!?!

ls -l http://www.freesoft.org/index.html

WHAT WORKS:

cat, less, vi, grep, sed, cp, ls, stat

WHAT DOESN'T:

"more" seems to read the beginning of the file twice. Of course,
the rewind with lseek() doesn't work on the pipe, so you end up
losing the beginning of the file (according to strace)

"emacs" doesn't like the URL; thinks it's a relative path and tries
to construct various absolute paths to get to the file; finally
gives up without trying the name it was given (according to strace)

Any directory operations

Anything but read-only access. No PUT support yet.

DRAWBACKS

Persistent connections appear difficult if not impossible. Perhaps
via a local cache?

Anything that doesn't go through libc loses - but what would that be?

FUTURE DIRECTIONS

I'd like to dump the Perl dependance. Any suggestions for a good C
library to perform URL operations?

Directories. Certificate and user/password authentication. Write
access.

Others as based on your feedback

Source code is also available at:

ftp://ftp.freesoft.org/pub/httpfs.c

-- 
					-bwb

Brent Baccala baccala@FreeSoft.org

--477024386-269167349-891757377=:12572 Content-Type: TEXT/plain; CHARSET=US-ASCII Content-Description: httpfs.c

#include <stdio.h> #include <fcntl.h> #include <sys/types.h> #include <sys/stat.h> #include <dlfcn.h> #include <unistd.h> #include <regex.h> #include <errno.h>

/* * Compile with: * * gcc -nostartfiles -fpic -shared -o httpfs.so httpfs.c -ldl */

/* #define DEBUG 1 */

int (*next__open)(const char *, int, mode_t); int (*nextopen)(const char *, int, mode_t); int (*nextstat)(int ver, const char *, struct stat *); int (*next__lxstat)(int ver, const char *, struct stat *);

regex_t url_regex;

char buffer[1024]; char date_buffer[1024];

void _init(void) { int retval; const char *errval;

#ifdef DEBUG __write(1,"GotInit\n",8); #endif

next__open = dlsym(RTLD_NEXT,"__open"); if ((errval = dlerror()) != NULL) { fprintf(stderr, "dlsym(__open): %s\n", errval); }

nextopen = dlsym(RTLD_NEXT,"open"); if ((errval = dlerror()) != NULL) { fprintf(stderr, "dlsym(open): %s\n", errval); }

nextstat = dlsym(RTLD_NEXT,"__xstat"); if ((errval = dlerror()) != NULL) { fprintf(stderr, "dlsym(__xstat): %s\n", errval); }

next__lxstat = dlsym(RTLD_NEXT,"__lxstat"); if ((errval = dlerror()) != NULL) { fprintf(stderr, "dlsym(__lxstat): %s\n", errval); }

retval = regcomp(&url_regex, "^[a-zA-Z0-9+.-]*:", 0); if (retval != 0) { fprintf(stderr, "Regular expression compilation failed in httpfs: return value %d\n", retval); }

#ifdef DEBUG __write(1,"EndInit\n",8); #endif }

int openhandler(const char *pathname) { FILE *header; int response_code = 0; pid_t pid; int pipes[2];

snprintf(buffer, sizeof(buffer), "lwp-request -m HEAD %s", pathname);

if ((header = popen(buffer, "r")) == NULL) { fprintf(stderr, "Can't popen for header in httpfs\n"); return -1; }

while (fgets(buffer, sizeof(buffer), header) != NULL) { sscanf(buffer, "%d ", &response_code); }

if (response_code != 200) { errno = ENOENT; return -1; }

if (pipe(pipes) == -1) { fprintf(stderr, "Pipe failed in httpfs\n"); return -1; }

pid = fork(); if (pid == -1) { fprintf(stderr, "Fork failed in httpfs\n"); close(pipes[0]); close(pipes[1]); return -1; } else if (pid == 0) { close(0); close(1); close(pipes[0]); dup2(pipes[1], 1);

execlp("lwp-request", "lwp-request", pathname, NULL); fprintf(stderr, "Exec failed in httpfs\n"); exit(); } else { close(pipes[1]); return pipes[0]; } }

int open(const char *pathname, int flags, ...) { if (regexec(&url_regex, pathname, 0, NULL, 0) == 0) { if ((flags & O_ACCMODE) != O_RDONLY) { errno = EACCES; return -1; } else { return openhandler(pathname); } } else { return (*nextopen)(pathname, flags, 0); } }

int __open(const char *pathname, int flags, ...) { if (regexec(&url_regex, pathname, 0, NULL, 0) == 0) { if ((flags & O_ACCMODE) != O_RDONLY) { errno = EACCES; return -1; } else { return openhandler(pathname); } } else { return (*next__open)(pathname, flags, 0); } }

static int stathandler(int ver, const char *pathname, struct stat *buf) { FILE *header; int response_code = 0; long length = 0; long date = 0; char *dateptr = NULL;

snprintf(buffer, sizeof(buffer), "lwp-request -m HEAD %s", pathname);

if ((header = popen(buffer, "r")) == NULL) { fprintf(stderr, "Can't popen for header in httpfs\n"); return -1; }

while (fgets(buffer, sizeof(buffer), header) != NULL) { if (sscanf(buffer, "%d ", &response_code) == 1) { } else if (sscanf(buffer, "Content-Length: %ld\n", &length) == 1) { } else if (sscanf(buffer, "Last-Modified: %[^\n]", date_buffer) == 1) { dateptr = date_buffer; } }

if (response_code != 200) { errno = ENOENT; return -1; }

if (dateptr != NULL) { FILE *dateFILE; snprintf(buffer, sizeof(buffer), "date +%%s -d '%s'", dateptr);

if ((dateFILE = popen(buffer, "r")) == NULL) { fprintf(stderr, "Can't popen for date in httpfs\n"); } else { fscanf(dateFILE, "%ld", &date); fclose(dateFILE); } }

buf->st_dev = 0; buf->st_ino = 0; buf->st_mode = S_IFREG | 0444; /* buf->st_mode = S_IFSOCK | 0444; */ buf->st_nlink = 1; buf->st_uid = 0; buf->st_gid = 0; buf->st_rdev = 0; buf->st_size = length; buf->st_atime = date; buf->st_mtime = date; buf->st_ctime = date;

return 0; }

int __xstat(int ver, const char *pathname, struct stat *buf) { if (regexec(&url_regex, pathname, 0, NULL, 0) == 0) { return stathandler(ver, pathname, buf); } else { return (*nextstat)(ver, pathname, buf); } }

int __lxstat(int ver, const char *pathname, struct stat *buf) { if (regexec(&url_regex, pathname, 0, NULL, 0) == 0) { return stathandler(ver, pathname, buf); } else { return (*next__lxstat)(ver, pathname, buf); } }

--477024386-269167349-891757377=:12572--

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu