Re: export of sys_call_table

From: Brian F. G. Bidulock (bidulock@openss7.org)
Date: Fri Oct 04 2002 - 06:19:32 EST


Arjan,

On Fri, 04 Oct 2002, Arjan van de Ven wrote:

> On Fri, 2002-10-04 at 01:06, Brian F. G. Bidulock wrote:
> > Alan,
> >
> > Would it be possible to put a secondary call table behind
> > the call gate wrappered in sys_ni_syscall that a module
> > could register against.
> Why ?
> Adding "unknown" syscalls is I doubt EVER a good idea.
> LiS has *known* and *official* syscalls, they can easily live with a
> stub like nfsd uses.... few lines of code and it's safe.

Well, nfsd does something like this:

        struct nfsd_linkage *nfsd_linkage = NULL;

        long
        asmlinkage sys_nfsservctl(int cmd, void *argp, void *resp)
        {
                int ret = -ENOSYS;
                
                lock_kernel();

                if (nfsd_linkage ||
                    (request_module ("nfsd") == 0 && nfsd_linkage))
                        ret = nfsd_linkage->do_nfsservctl(cmd, argp, resp);

                unlock_kernel();
                return ret;
        }
        EXPORT_SYMBOL(nfsd_linkage);

I take it that this system call is not in nsfd's main data flow
(probably write() and read are()). Taking the big kernel lock is
excessive across every putpmsg() and getpmsg() operation and would
seriously impact LiS performance on multiple processors. In effect,
only one processor would run for LiS. A reader/write lock would be
better.

Also, LiS does not require module loading on system call, but
(questionably) needs unloading protection -- LiS does not really
need to unload once loaded. This turns into something more like:

        static int (*do_putpmsg) (int, void *, void *, int, int) = NULL;
        static int (*do_getpmsg) (int, void *, void *, int, int) = NULL;
        static int (*do_spipe) (int *) = NULL;
        static int (*do_fattach) (int, const char *) = NULL;
        static int (*do_fdetach) (const char *) = NULL;

        static rwlock_t streams_call_lock = RW_LOCK_UNLOCKED;

        static long asmlinkage sys_putpmsg(int fd, void *ctlptr,
                                           void *dataptr, int band, int flags)
        {
                int ret = -ENOSYS;
                read_lock(&streams_call_lock);
                if (do_putpmsg)
                        ret = do_putpmsg(fd, ctrptr, dataptr, band, flags);
                read_unlock(&streams_call_lock);
                return ret;
        }

        static long asmlinkage sys_getpmsg(int fd, void *ctlptr,
                                           void *dataptr, int band, int flags)
        {
                int ret = -ENOSYS;
                read_lock(&streams_call_lock);
                if (do_getpmsg)
                        ret = do_getpmsg(fd, ctrptr, dataptr, band, flags);
                read_unlock(&streams_call_lock);
                return ret;
        }

        static long asmlinkage sys_spipe(int *fd)
        {
                int ret = -ENOSYS;
                read_lock(&streams_call_lock);
                if (do_spipe)
                        ret = do_spipe(fd);
                read_unlock(&streams_call_lock);
                return ret;
        }

        static long asmlinkage sys_fattach(int fd, const char *path)
        {
                int ret = -ENOSYS;
                read_lock(&streams_call_lock);
                if (do_fattach)
                        ret = do_fattach(fd, path);
                read_unlock(&streams_call_lock);
                return ret;
        }

        static long asmlinkage sys_fdetach(const char *path)
        {
                int ret = -ENOSYS;
                read_lock(&streams_call_lock);
                if (do_fdetach)
                        ret = do_fdetach(path);
                read_unlock(&streams_call_lock);
                return ret;
        }

        void register_streams_calls(int (*putpmsg) (int, void *, void *, int, int),
                                    int (*getpmsg) (int, void *, void *, int, int),
                                    int (*spipe) (int *),
                                    int (*fattach) (int, const char *),
                                    int (*fdetach) (const char *))
        {
                write_lock(&streams_call_lock);
                do_putpmsg = putpmsg;
                do_getpmsg = getpmsg;
                do_spipe = spipe;
                do_fattach = fattach;
                do_fdetach = fdetach;
                write_unlock(&streams_call_lock);
        }
        void unregister_streams_calls(void)
        {
                register_streams_calls(NULL, NULL, NULL, NULL, NULL);
        }

        EXPORT_SYMBOL(register_streams_calls);
        EXPORT_SYMBOL(unregister_streams_calls);

The module (LiS or iBCS) calls register_streams_calls after it loads and calls
unregister_streams_calls before it unloads.

But this is repetative and doesn't solve replacement of existing
system calls for profilers and such. Having a single secondary
call table approch such as:

        struct sys_secondary_call {
                rwlock_t lock;
                long asmlinkage(*call) (void);
        } sys_secondary_call_table[256];

        void *replace_syscall(__u8 nr, void *newcall)
        {
                void *oldcall;
                write_lock(&sys_secondary_call_table[nr].lock);
                oldcall = xchg(&sys_secondary_call_table[nr].call, newcall);
                write_unlock(&sys_secondary_call_table[nr].lock);
                return (oldcall);
        }
        EXPORT_SYMBOL(replace_syscall);

        #define SYSCALL_STUB(num) \
        long asmlinkage sys_call_ # num (void) { \
                int ret = -ENOSYS; \
                read_lock(&sys_secondary_call_table[num].lock); \
                if (sys_secondary_call_table[num].call) { \
                        ret = (*sys_secondary_call_table.call) (); \
                read_unlock(&sys_secondary_call_table[num].lock); \
                return (ret); \
        }

        SYSCALL_STUB(__NR_setup);
        SYSCALL_STUB(__NR_exit);
        SYSCALL_STUB(__NR_fork);
                .
                .
                .
               etc.

With entry.S looking like:

        .data
        ENTRY(sys_call_table)
                .long SYMBOL_NAME(sys_call_0)
                .long SYMBOL_NAME(sys_call_1)
                                .
                                .
                                .
                .long SYMBOL_NAME(sys_call_255)

Then any module could both replace or implement otherwise non-implemented
system calls. It just seems that the general purpose approach could work
better for most things (even nfsd).

--brian

-- 
Brian F. G. Bidulock    ¦ The reasonable man adapts himself to the ¦
bidulock@openss7.org    ¦ world; the unreasonable one persists in  ¦
http://www.openss7.org/ ¦ trying  to adapt the  world  to himself. ¦
                        ¦ Therefore  all  progress  depends on the ¦
                        ¦ unreasonable man. -- George Bernard Shaw ¦


- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Mon Oct 07 2002 - 22:00:43 EST