[RFC/PATCH] FUSYN 2/10: modifications to the core

From: inaky . perez-gonzalez
Date: Wed Jan 14 2004 - 17:56:27 EST


These are modifications needed to support fuqueues and
fulocks. They are, in a nutshell:

- Move some stuff from kernel/futex.c to futex.h to make it
usable by more people.

- Add neccesary fields to the task_struct.

- Hookup to initialization and cleanup points for process
creation and destruction.

- Hookup into the signalling and time out code for wait
cancellation, as well as for changing task priorities.

- Add error codes for dead-owner and not-recoverable.


include/asm-generic/errno-base.h | 4 +++
include/linux/futex.h | 52 +++++++++++++++++++++++++++++++++++++++
include/linux/init_task.h | 5 +++
include/linux/sched.h | 9 ++++++
kernel/Makefile | 3 +-
kernel/exit.c | 3 ++
kernel/fork.c | 2 +
kernel/futex.c | 39 +++--------------------------
kernel/sched.c | 52 ++++++++++++++++++++++++++++++++++++++-
kernel/signal.c | 16 +++++++++++-
kernel/timer.c | 11 +++++++-
11 files changed, 157 insertions(+), 39 deletions(-)

--- linux/include/linux/futex.h:1.1.1.1 Thu Jul 10 12:27:31 2003
+++ linux/include/linux/futex.h Sun Nov 16 07:21:32 2003
@@ -9,7 +9,11 @@
#define FUTEX_FD (2)
#define FUTEX_REQUEUE (3)

+#ifdef __KERNEL__

+#include <linux/jhash.h>
+
+struct timespec;
asmlinkage long sys_futex(u32 __user *uaddr, int op, int val,
struct timespec __user *utime, u32 __user *uaddr2);

@@ -17,4 +21,52 @@
long do_futex(unsigned long uaddr, int op, int val,
unsigned long timeout, unsigned long uaddr2, int val2);

+/*
+ * Futexes are matched on equal values of this key.
+ * The key type depends on whether it's a shared or private mapping.
+ * Don't rearrange members without looking at futex_hash_key().
+ *
+ * offset is aligned to a multiple of sizeof(u32) (== 4) by definition.
+ * We set bit 0 to indicate if it's an inode-based key.
+ */
+union futex_key {
+ struct {
+ unsigned long pgoff;
+ struct inode *inode;
+ int offset;
+ } shared;
+ struct {
+ unsigned long uaddr;
+ struct mm_struct *mm;
+ int offset;
+ } private;
+ struct {
+ unsigned long word;
+ void *ptr;
+ int offset;
+ } both;
+};
+
+static inline
+u32 futex_hash_key (const union futex_key *key)
+{
+ u32 hash = jhash2((u32*)&key->both.word,
+ (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
+ key->both.offset);
+ return hash;
+}
+
+static inline
+int match_futex_key (const union futex_key *key1, const union futex_key *key2)
+{
+ return (key1->both.word == key2->both.word
+ && key1->both.ptr == key2->both.ptr
+ && key1->both.offset == key2->both.offset);
+}
+
+extern int get_futex_key (unsigned long uaddr, union futex_key *key);
+extern void get_key_refs(union futex_key *key);
+extern void drop_key_refs(union futex_key *key);
+
+#endif /* #ifdef __KERNEL__ */
#endif
--- linux/include/linux/init_task.h:1.1.1.2 Thu Aug 28 12:38:00 2003
+++ linux/include/linux/init_task.h Sun Nov 16 07:21:32 2003
@@ -108,6 +108,11 @@
.proc_lock = SPIN_LOCK_UNLOCKED, \
.switch_lock = SPIN_LOCK_UNLOCKED, \
.journal_info = NULL, \
+ .fuqueue_wait_lock = SPIN_LOCK_UNLOCKED, \
+ .fuqueue_wait = NULL, \
+ .fuqueue_waiter = NULL, \
+ .fulock_olist = olist_INIT(&tsk.fulock_olist), \
+ .fulock_olist_lock = SPIN_LOCK_UNLOCKED, \
}


--- linux/include/linux/sched.h:1.1.1.11 Tue Jan 13 21:13:50 2004
+++ linux/include/linux/sched.h Wed Jan 14 11:13:30 2004
@@ -29,6 +29,7 @@
#include <linux/completion.h>
#include <linux/pid.h>
#include <linux/percpu.h>
+#include <linux/fulock-olist.h> /* olist_t */

struct exec_domain;

@@ -326,7 +327,8 @@
struct sigqueue *sigq; /* signal queue entry. */
};

-
+struct fuqueue;
+struct fuqueue_waiter;
struct io_context; /* See blkdev.h */
void exit_io_context(void);

@@ -464,6 +466,11 @@

unsigned long ptrace_message;
siginfo_t *last_siginfo; /* For ptrace use. */
+ struct fuqueue *fuqueue_wait; /* waiting for this qeueue */
+ struct fuqueue_waiter *fuqueue_waiter; /* waiting for this qeueue */
+ spinlock_t fuqueue_wait_lock; /* FIXME: locking too heavy -- better sollution? */
+ olist_t fulock_olist; /* Fulock ownership list */
+ spinlock_t fulock_olist_lock;
};

static inline pid_t process_group(struct task_struct *tsk)
--- linux/kernel/Makefile:1.1.1.7 Tue Jan 13 21:13:51 2004
+++ linux/kernel/Makefile Wed Jan 14 11:13:33 2004
@@ -7,7 +7,8 @@
sysctl.o capability.o ptrace.o timer.o user.o \
signal.o sys.o kmod.o workqueue.o pid.o \
rcupdate.o intermodule.o extable.o params.o posix-timers.o \
- kthread.o
+ kthread.o \
+ fuqueue.o fulock.o fulock-pp.o vlocator.o ufuqueue.o ufulock.o

obj-$(CONFIG_FUTEX) += futex.o
obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o
--- linux/kernel/exit.c:1.1.1.8 Tue Jan 13 20:54:51 2004
+++ linux/kernel/exit.c Wed Jan 14 11:13:33 2004
@@ -740,6 +740,8 @@

}

+extern void exit_fulocks(struct task_struct *);
+
NORET_TYPE void do_exit(long code)
{
struct task_struct *tsk = current;
@@ -768,6 +770,7 @@
}

acct_process(code);
+ exit_fulocks(tsk);
__exit_mm(tsk);

exit_sem(tsk);
--- linux/kernel/fork.c:1.1.1.11 Tue Jan 13 21:13:51 2004
+++ linux/kernel/fork.c Wed Jan 14 11:13:33 2004
@@ -40,6 +40,7 @@

extern int copy_semundo(unsigned long clone_flags, struct task_struct *tsk);
extern void exit_sem(struct task_struct *tsk);
+extern void init_fulock (struct task_struct *task);

/* The idle threads do not count..
* Protected by write_lock_irq(&tasklist_lock)
@@ -968,6 +969,7 @@
goto bad_fork_cleanup_signal;
if ((retval = copy_namespace(clone_flags, p)))
goto bad_fork_cleanup_mm;
+ init_fulock(p);
retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
if (retval)
goto bad_fork_cleanup_namespace;
--- linux/kernel/futex.c:1.1.1.6 Tue Jan 13 21:13:51 2004
+++ linux/kernel/futex.c Wed Jan 14 11:13:33 2004
@@ -41,31 +41,6 @@

#define FUTEX_HASHBITS 8

-/*
- * Futexes are matched on equal values of this key.
- * The key type depends on whether it's a shared or private mapping.
- * Don't rearrange members without looking at hash_futex().
- *
- * offset is aligned to a multiple of sizeof(u32) (== 4) by definition.
- * We set bit 0 to indicate if it's an inode-based key.
- */
-union futex_key {
- struct {
- unsigned long pgoff;
- struct inode *inode;
- int offset;
- } shared;
- struct {
- unsigned long uaddr;
- struct mm_struct *mm;
- int offset;
- } private;
- struct {
- unsigned long word;
- void *ptr;
- int offset;
- } both;
-};

/*
* We use this hashed waitqueue instead of a normal wait_queue_t, so
@@ -109,9 +84,7 @@
*/
static struct futex_hash_bucket *hash_futex(union futex_key *key)
{
- u32 hash = jhash2((u32*)&key->both.word,
- (sizeof(key->both.word)+sizeof(key->both.ptr))/4,
- key->both.offset);
+ u32 hash = futex_hash_key (key);
return &futex_queues[hash & ((1 << FUTEX_HASHBITS)-1)];
}

@@ -120,9 +93,7 @@
*/
static inline int match_futex(union futex_key *key1, union futex_key *key2)
{
- return (key1->both.word == key2->both.word
- && key1->both.ptr == key2->both.ptr
- && key1->both.offset == key2->both.offset);
+ return match_futex_key (key1, key2);
}

/*
@@ -137,7 +108,7 @@
*
* Should be called with &current->mm->mmap_sem but NOT any spinlocks.
*/
-static int get_futex_key(unsigned long uaddr, union futex_key *key)
+int get_futex_key(unsigned long uaddr, union futex_key *key)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -232,7 +203,7 @@
* NOTE: mmap_sem MUST be held between get_futex_key() and calling this
* function, if it is called at all. mmap_sem keeps key->shared.inode valid.
*/
-static inline void get_key_refs(union futex_key *key)
+inline void get_key_refs(union futex_key *key)
{
if (key->both.ptr != 0) {
if (key->both.offset & 1)
@@ -246,7 +217,7 @@
* Drop a reference to the resource addressed by a key.
* The hash bucket spinlock must not be held.
*/
-static void drop_key_refs(union futex_key *key)
+inline void drop_key_refs(union futex_key *key)
{
if (key->both.ptr != 0) {
if (key->both.offset & 1)
--- linux/kernel/sched.c:1.1.1.15 Tue Jan 13 21:13:51 2004
+++ linux/kernel/sched.c Wed Jan 14 11:13:33 2004
@@ -658,7 +658,8 @@
int success = 0;
long old_state;
runqueue_t *rq;
-
+
+
repeat_lock_task:
rq = task_rq_lock(p, &flags);
old_state = p->state;
@@ -1761,6 +1762,8 @@

EXPORT_SYMBOL(default_wake_function);

+extern void fuqueue_wait_cancel(struct task_struct *, int);
+
/*
* The core wakeup function. Non-exclusive wakeups (nr_exclusive == 0) just
* wake everything up. If it's an exclusive wakeup (nr_exclusive == small +ve
@@ -1769,6 +1772,9 @@
* There are circumstances in which we can try to wake a task which has already
* started to run but is not in state TASK_RUNNING. try_to_wake_up() returns
* zero in this (rare) case, and we handle it by continuing to scan the queue.
+ *
+ * fuqueue_wait_cancel needs to hook up here to properly rescheduler
+ * priority inheritance/protected tasks. Check its doc to learn why.
*/
static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
int nr_exclusive, int sync)
@@ -1780,6 +1786,8 @@
unsigned flags;
curr = list_entry(tmp, wait_queue_t, task_list);
flags = curr->flags;
+ if (unlikely(curr->task->fuqueue_wait != NULL))
+ fuqueue_wait_cancel(curr->task, -EINTR);
if (curr->func(curr, mode, sync) &&
(flags & WQ_FLAG_EXCLUSIVE) &&
!--nr_exclusive)
@@ -2106,6 +2114,46 @@
return pid ? find_task_by_pid(pid) : current;
}

+/**
+ * Unconditionally set the effective priority of a task.
+ *
+ * Not too useful on SCHED_OTHER tasks, btw ...
+ *
+ * @p Pointer to the task in question
+ * @prio New priority to set
+ */
+#warning FIXME: need to play by POSIX rules on prio change and list repositioning because of prio inheritance
+
+void __set_prio (struct task_struct *p, int prio)
+{
+ runqueue_t *rq;
+ prio_array_t *array;
+ long flags;
+ int oldprio;
+
+ rq = task_rq_lock(p, &flags);
+ oldprio = p->prio;
+ array = p->array;
+ if (!array) {
+ p->prio = prio;
+ goto out_unlock;
+ }
+ deactivate_task(p, task_rq(p));
+ p->prio = prio;
+ __activate_task(p, task_rq(p));
+ if (rq->curr == p) {
+ if (p->prio > oldprio)
+ resched_task(rq->curr);
+ }
+ else if (TASK_PREEMPTS_CURR (p, rq))
+ resched_task(rq->curr);
+out_unlock:
+ task_rq_unlock (rq, &flags);
+}
+
+
+extern void fuqueue_chprio (struct task_struct *);
+
/*
* setscheduler - change the scheduling policy and/or RT priority of a thread.
*/
@@ -2185,6 +2233,8 @@
p->prio = MAX_USER_RT_PRIO-1 - p->rt_priority;
else
p->prio = p->static_prio;
+ if (p->fuqueue_wait != NULL)
+ fuqueue_chprio(p);
if (array) {
__activate_task(p, task_rq(p));
/*
--- linux/kernel/signal.c:1.1.1.9 Tue Jan 13 20:54:51 2004
+++ linux/kernel/signal.c Wed Jan 14 11:13:33 2004
@@ -524,6 +524,8 @@
return signr;
}

+extern void fuqueue_wait_cancel (struct task_struct *, int);
+
/*
* Tell a process that it has a new active signal..
*
@@ -547,12 +549,21 @@
* executing another processor and just now entering stopped state.
* By calling wake_up_process any time resume is set, we ensure
* the process will wake up and handle its stop or death signal.
+ *
+ * fuqueue_wait_cancel needs to hook up here to properly rescheduler
+ * priority inheritance/protected tasks. The reason is that
+ * when we resched a process that has boosted another one, we
+ * need to kick its butt off the CPU (and lower its priority) ASAP
+ * so that 't' can run.
*/
mask = TASK_INTERRUPTIBLE;
if (resume)
mask |= TASK_STOPPED;
- if (!wake_up_state(t, mask))
+ if (!wake_up_state(t, mask)) {
+ if (unlikely (t->fuqueue_wait != NULL))
+ fuqueue_wait_cancel(t, -EINTR);
kick_process(t);
+ }
}

/*
@@ -675,6 +686,9 @@
set_tsk_thread_flag(t, TIF_SIGPENDING);
state |= TASK_INTERRUPTIBLE;
}
+ /* FIXME: I am not that sure we need to cancel here */
+ if (unlikely(t->fuqueue_wait != NULL))
+ fuqueue_wait_cancel(t, -EINTR);
wake_up_state(t, state);

t = next_thread(t);
--- linux/kernel/timer.c:1.1.1.10 Tue Jan 13 21:13:51 2004
+++ linux/kernel/timer.c Wed Jan 14 11:13:34 2004
@@ -966,9 +966,18 @@

#endif

+/*
+ * fuqueue_wait_cancel needs to hook up here to properly rescheduler
+ * priority inheritance/protected tasks. Check its doc to learn why.
+ */
+extern void fuqueue_wait_cancel(struct task_struct *, int);
+
static void process_timeout(unsigned long __data)
{
- wake_up_process((task_t *)__data);
+ struct task_struct *task = (task_t *) __data;
+ if (unlikely(task->fuqueue_wait != NULL))
+ fuqueue_wait_cancel(task, -ETIMEDOUT);
+ wake_up_process(task);
}

/**
--- linux/include/asm-generic/errno-base.h:1.1.1.1 Thu Jul 10 12:27:27 2003
+++ linux/include/asm-generic/errno-base.h Sun Nov 16 07:21:32 2003
@@ -36,4 +36,8 @@
#define EDOM 33 /* Math argument out of domain of func */
#define ERANGE 34 /* Math result not representable */

+ /* FIXME: ugly hack to avoid conflicts -- need to get better numbers */
+#define EOWNERDEAD 525 /* Mutex owner died */
+#define ENOTRECOVERABLE 526 /* Mutex state is not recoverable */
+
#endif
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/