Re: 2.1.131-ac3 3c509 no irq_save in the sender code

Andrea Arcangeli (andrea@e-mind.com)
Mon, 7 Dec 1998 17:10:23 +0100 (CET)


On Sat, 5 Dec 1998, Alan Cox wrote:

>> Are you sure you are not reinserting a bug? Using the irq_save spinlock
>> would avoid that an irq handler start playing with the NIC while it was
>> sending data (outsl(ioaddr + TX_FIFO, skb->data, (skb->len + 3) >>
>> 2);) also in UP. I think this is the reason of the TX timeout errors under
^^^^^^^^^^

>Don't reverse it. That trashes your interrupt latency. An irq_save spinlock
>isnt acceptable for an ISA bus block transfer, you might be 2mS with irqs
>off.

I agree with a better fix of course ;)

>If you look at the change, it does what Ingo suggested which is to disable
>the receive interrupt during a transmit (tx is single threaded implicitly),

I don' t understand very well the meaning of `tx is single threaded
implicitly'...

>that way only the card suffers. This means the only irq locked off is the
>3c509 one.

It would be nice if that would be true also on UP.

[snipped from 131-ac-4]

+#ifdef __SMP__
^^^^^^^^^^^^^^

+ disable_irq(dev->irq);
+ spin_lock(&lp->lock);
+#endif

/* Put out the doubleword header... */
outw(skb->len, ioaddr + TX_FIFO);

[snipped from 131-ac-4]

>You still need a spinlock because another CPU could begin executing a transmit
>during a receive. In which case we spin on the spinlock until the receive
>irq completes (in actual fact I believe the disable_irq handles that), and
>also because any CPU could execute a get_stats request.

I understand this very well (the no-irq spin_lock it' s needed to
synchronize with the irq ;).

>So I believe the code to be correct.

It' s correct in SMP infact. Probably I have not focused the point very
well in my first email. This fix the UP problem I seen (and we will not
waste time in spin_lock() in UP of course ;). (I had not time to think
about the best fix the last day :().

Index: linux/drivers/net/3c509.c
diff -u linux/drivers/net/3c509.c:1.1.3.1 linux/drivers/net/3c509.c:1.1.1.1.2.2
--- linux/drivers/net/3c509.c:1.1.3.1 Sat Dec 5 11:13:41 1998
+++ linux/drivers/net/3c509.c Mon Dec 7 17:01:57 1998
@@ -538,10 +538,8 @@
* time sensitive devices.
*/

-#ifdef __SMP__
disable_irq(dev->irq);
- spin_lock(&lp->lock);
-#endif
+ spin_lock(&lp->lock);

/* Put out the doubleword header... */
outw(skb->len, ioaddr + TX_FIFO);
@@ -559,10 +557,8 @@
} else
/* Interrupt us when the FIFO has room for max-sized packet. */
outw(SetTxThreshold + 1536, ioaddr + EL3_CMD);
-#ifdef __SMP__
spin_unlock(&lp->lock);
enable_irq(dev->irq);
-#endif
}

dev_kfree_skb (skb);

Note in your tree there' s another bad bug caused by me (I fixed it some
weeks ago...). Excuse me.

Index: linux/drivers/net/hp100.c
diff -u linux/drivers/net/hp100.c:1.1.3.1 linux/drivers/net/hp100.c:1.1.1.1.2.1
--- linux/drivers/net/hp100.c:1.1.3.1 Sat Dec 5 11:13:48 1998
+++ linux/drivers/net/hp100.c Fri Nov 20 00:15:23 1998
@@ -2798,7 +2798,7 @@
if ( hp100_inb( VG_LAN_CFG_1 ) & HP100_LINK_CABLE_ST ) break;
} while (time_after(time, jiffies));

- if ( time_before_eq(jiffies, time) ) /* no signal->no logout */
+ if ( time_after_eq(jiffies, time) ) /* no signal->no logout */
return 0;

/* Drop the VG Link by clearing the link up cmd and load addr.*/
@@ -2813,7 +2813,7 @@
} while(time_after(time, jiffies));

#ifdef HP100_DEBUG
- if (time_before_eq(jiffies, time))
+ if (time_after_eq(jiffies, time))
printk("hp100: %s: down_vg_link: Link does not go down?\n", dev->name);
#endif

Also these other fixes always from me are needed:

Index: linux/arch/i386/kernel/process.c
diff -u linux/arch/i386/kernel/process.c:1.1.3.1 linux/arch/i386/kernel/process.c:1.1.1.1.2.12
--- linux/arch/i386/kernel/process.c:1.1.3.1 Sat Dec 5 11:12:00 1998
+++ linux/arch/i386/kernel/process.c Sat Dec 5 12:22:23 1998
@@ -73,11 +73,11 @@

#ifndef __SMP__

+#ifdef CONFIG_APM
static void hard_idle(void)
{
while (!current->need_resched) {
if (boot_cpu_data.hlt_works_ok && !hlt_counter) {
-#ifdef CONFIG_APM
/* If the APM BIOS is not enabled, or there
is an error calling the idle routine, we
should hlt if possible. We need to check
@@ -87,38 +87,49 @@
if (!apm_do_idle() && !current->need_resched)
__asm__("hlt");
end_bh_atomic();
-#else
- __asm__("hlt");
-#endif
}
if (current->need_resched)
break;
schedule();
}
-#ifdef CONFIG_APM
apm_do_busy();
-#endif
}
+#endif

/*
* The idle loop on a uniprocessor i386..
*/
static int cpu_idle(void *unused)
{
+#ifdef CONFIG_APM
unsigned long start_idle = jiffies;
+#endif

/* endless idle loop with no priority at all */
+ current->priority = 0;
+ /* before change this, see reschedule_idle() in kernel/sched.c */
+ current->counter = -4;
for (;;) {
- if (jiffies - start_idle > HARD_IDLE_TIMEOUT)
+#ifdef CONFIG_APM
+ if (jiffies - start_idle >= HARD_IDLE_TIMEOUT)
+ {
hard_idle();
+ start_idle = jiffies - HARD_IDLE_TIMEOUT;
+ }
else {
+#endif
if (boot_cpu_data.hlt_works_ok && !hlt_counter && !current->need_resched)
__asm__("hlt");
+
+#ifdef CONFIG_APM
}
- if (current->need_resched)
+ if (current->need_resched)
+ {
+ schedule();
start_idle = jiffies;
- current->policy = SCHED_YIELD;
- schedule();
+ } else
+#endif
+ schedule();
check_pgt_cache();
}
}
@@ -131,12 +142,13 @@

int cpu_idle(void *unused)
{
-
/* endless idle loop with no priority at all */
+ current->priority = 0;
+ /* before change this, see reschedule_idle() in kernel/sched.c */
+ current->counter = -4;
while(1) {
if (current_cpu_data.hlt_works_ok && !hlt_counter && !current->need_resched)
__asm__("hlt");
- current->policy = SCHED_YIELD;
schedule();
check_pgt_cache();
}
Index: linux/kernel/sched.c
diff -u linux/kernel/sched.c:1.1.3.1 linux/kernel/sched.c:1.1.1.1.2.26
--- linux/kernel/sched.c:1.1.3.1 Sat Dec 5 11:17:28 1998
+++ linux/kernel/sched.c Sat Dec 5 12:22:26 1998
@@ -7,6 +7,8 @@
* 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and
* make semaphores SMP safe
* 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better.
+ * 1998-11-19 Implemented schedule_timeout() and related stuff
+ * by Andrea Arcangeli
*/

/*
@@ -93,7 +96,7 @@

static inline void reschedule_idle(struct task_struct * p)
{
-
+ struct task_struct * this = current;
/*
* For SMP, we try to see if the CPU the task used
* to run on is idle..
@@ -130,15 +133,9 @@
}
#endif
#endif
- /*
- * If the current process is the idle one, we must reschedule ASAP.
- * Checking for p->counter >= current->counter we have the idle task
- * check implicit.
- * -arca
- */
- if (/* implicit !current->pid || */ p->counter >= current->counter ||
+ if (p->counter + p->priority > this->counter + this->priority + 3 ||
p->policy != SCHED_OTHER)
- current->need_resched = 1;
+ this->need_resched = 1;
}

/*
@@ -444,23 +445,6 @@
struct timer_list timer;
unsigned long expire;

- /*
- * PARANOID.
- */
- if (current->state == TASK_UNINTERRUPTIBLE)
- {
- printk(KERN_WARNING "schedule_timeout: task not interrutible "
- "from %p\n", __builtin_return_address(0));
- /*
- * We don' t want to interrupt a not interruptible task
- * risking to cause corruption. Better a a deadlock ;-).
- */
- timeout = MAX_SCHEDULE_TIMEOUT;
- }
-
- /*
- * Here we start for real.
- */
switch (timeout)
{
case MAX_SCHEDULE_TIMEOUT:
@@ -601,10 +585,12 @@

#ifdef __SMP__
next->has_cpu = 1;
- next->processor = this_cpu;
#endif

if (prev != next) {
+#ifdef __SMP__
+ next->processor = this_cpu;
+#endif
kstat.context_swtch++;
get_mmu_context(next);
switch_to(prev,next);
@@ -1628,7 +1614,7 @@
return 0;
}

- expire = timespec_to_jiffies(&t) + (t.tv_sec || t.tv_nsec);
+ expire = timespec_to_jiffies(&t);

current->state = TASK_INTERRUPTIBLE;
expire = schedule_timeout(expire);

Index: linux/include/linux/time.h
diff -u linux/include/linux/time.h:1.1.3.1 linux/include/linux/time.h:1.1.1.1.2.16
--- linux/include/linux/time.h:1.1.3.1 Sat Dec 5 11:17:02 1998
+++ linux/include/linux/time.h Sun Nov 29 15:30:42 1998
@@ -13,43 +13,140 @@
#endif /* _STRUCT_TIMESPEC */

/*
- * Change timeval to jiffies, trying to avoid the
- * most obvious overflows..
+ * I reimplmeneted the big macros taking care of all possible overflows.
+ * I started hacking from the previous Linux code in this file.
+ * NOTE: The long macros in this file avoid _source_ code duplication
+ * and must be used only in the specific cases that will flollow.
+ * NOTE: The code `retval = xsec + sec + PLUS_N' can' t be exchanged
+ * with something like `retval = xsec + xxsec + sec + PLUS_N'.
+ * Andrea Arcangeli 27 Nov 1998
*
- * And some not so obvious.
+ * This code should to avoid all possible overflows...
*
- * Note that we don't want to return MAX_LONG, because
- * for various timeout reasons we often end up having
- * to wait "jiffies+1" in order to guarantee that we wait
- * at _least_ "jiffies" - so "jiffies+1" had better still
- * be positive.
+ * And some not so obvious.
*/
-#define MAX_JIFFY_OFFSET ((~0UL >> 1)-1)
+
+#define MAX_JIFFY_OFFSET (~0UL >> 1)
+
+#define TIME_TSTRUCT_TO_JIFFIES(TIMEX, PLUS_N, LITTLE_FIELD) \
+ unsigned long sec = value->tv_sec, xsec = value->LITTLE_FIELD; \
+ unsigned long xsec_tmp; \
+ long retval; \
+ \
+ if (sec >= (MAX_JIFFY_OFFSET-PLUS_N) / HZ) \
+ return MAX_JIFFY_OFFSET; \
+ sec *= HZ; \
+ \
+ xsec_tmp = xsec + (TIMEX+HZ-1)/HZ - 1; \
+ if (xsec_tmp > xsec) \
+ xsec = xsec_tmp; \
+ xsec /= (TIMEX+HZ-1)/HZ; \
+ if (xsec >= (MAX_JIFFY_OFFSET-PLUS_N)) \
+ return MAX_JIFFY_OFFSET; \
+ \
+ retval = xsec + sec + PLUS_N; \
+ if (retval < 0) \
+ return MAX_JIFFY_OFFSET; \
+ else if (!PLUS_N && !retval) \
+ return value->tv_sec || value->LITTLE_FIELD; \
+ \
+ return retval;

-static __inline__ unsigned long
-timespec_to_jiffies(struct timespec *value)
+#define TIME_JIFFIES_TO_TSTRUCT(TIMEX, LITTLE_FIELD) \
+ value->tv_sec = jiffies / HZ; \
+ value->LITTLE_FIELD = (jiffies % HZ) * (TIMEX / HZ);
+
+/*
+ * change timespec to jiffies
+ */
+static __inline__ long
+timespec_to_jiffies(const struct timespec *value)
{
- unsigned long sec = value->tv_sec;
- long nsec = value->tv_nsec;
+ TIME_TSTRUCT_TO_JIFFIES(1000000000UL, 0, tv_nsec);
+}

- if (sec >= (MAX_JIFFY_OFFSET / HZ))
- return MAX_JIFFY_OFFSET;
- nsec += 1000000000L / HZ - 1;
- nsec /= 1000000000L / HZ;
- return HZ * sec + nsec;
+/*
+ * change timespec to jiffies plus one
+ */
+static __inline__ long
+timespec_to_jiffies_plus_one(const struct timespec *value)
+{
+ TIME_TSTRUCT_TO_JIFFIES(1000000000UL, 1, tv_nsec);
}

static __inline__ void
-jiffies_to_timespec(unsigned long jiffies, struct timespec *value)
+jiffies_to_timespec(const unsigned long jiffies, struct timespec *value)
{
- value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ);
- value->tv_sec = jiffies / HZ;
+ TIME_JIFFIES_TO_TSTRUCT(1000000000L, tv_nsec);
}
-
+
struct timeval {
time_t tv_sec; /* seconds */
suseconds_t tv_usec; /* microseconds */
};
+
+static __inline__ long
+timeval_to_jiffies(const struct timeval *value)
+{
+ TIME_TSTRUCT_TO_JIFFIES(1000000UL, 0, tv_usec)
+}
+
+static __inline__ long
+timeval_to_jiffies_plus_one(const struct timeval *value)
+{
+ TIME_TSTRUCT_TO_JIFFIES(1000000UL, 1, tv_usec)
+}
+
+static __inline__ void
+jiffies_to_timeval(const unsigned long jiffies, struct timeval *value)
+{
+ TIME_JIFFIES_TO_TSTRUCT(1000000L, tv_usec);
+}
+
+#define TIME_LITTLE_XSEC_TO_JIFFIES(XTIME, PLUS_N) \
+ if (xsec >= (MAX_JIFFY_OFFSET-PLUS_N) / ((HZ+(XTIME-1))/XTIME)) \
+ return MAX_JIFFY_OFFSET; \
+ xsec = (xsec*HZ+(XTIME-1))/XTIME + PLUS_N;
+
+#define TIME_BIG_XSEC_TO_JIFFIES(XTIME, PLUS_N) \
+ unsigned long xsec_tmp, xsec_orig = xsec; \
+ xsec_tmp = xsec + (XTIME+HZ-1)/HZ - 1; \
+ if (xsec_tmp > xsec) \
+ xsec = xsec_tmp; \
+ xsec = xsec / ((XTIME+HZ-1)/HZ) + PLUS_N; \
+ \
+ if ((signed) xsec < 0) \
+ return MAX_JIFFY_OFFSET; \
+ else if (!PLUS_N && !xsec) \
+ return xsec_orig != 0;
+
+/*
+ * change msec to jiffies
+ */
+static __inline__ long
+msec_to_jiffies(unsigned long xsec)
+{
+#if HZ >= 1000
+ TIME_LITTLE_XSEC_TO_JIFFIES(1000, 0);
+#else
+ TIME_BIG_XSEC_TO_JIFFIES(1000, 0);
+#endif
+ return xsec;
+}
+
+/*
+ * change msec to jiffies plus one
+ */
+static __inline__ long
+msec_to_jiffies_plus_one(unsigned long xsec)
+{
+#if HZ >= 1000
+ TIME_LITTLE_XSEC_TO_JIFFIES(1000, 1);
+#else
+ TIME_BIG_XSEC_TO_JIFFIES(1000, 1);
+#endif
+ return xsec;
+}

struct timezone {
int tz_minuteswest; /* minutes west of Greenwich */
Index: linux/kernel/signal.c
diff -u linux/kernel/signal.c:1.1.3.1 linux/kernel/signal.c:1.1.1.1.2.5
--- linux/kernel/signal.c:1.1.3.1 Sat Dec 5 11:17:28 1998
+++ linux/kernel/signal.c Sat Dec 5 11:59:05 1998
@@ -738,8 +738,7 @@

timeout = MAX_SCHEDULE_TIMEOUT;
if (uts)
- timeout = (timespec_to_jiffies(&ts)
- + (ts.tv_sec || ts.tv_nsec));
+ timeout = timespec_to_jiffies(&ts);

current->state = TASK_INTERRUPTIBLE;
timeout = schedule_timeout(timeout);
Index: linux/fs/select.c
diff -u linux/fs/select.c:1.1.3.1 linux/fs/select.c:1.1.1.1.2.6
--- linux/fs/select.c:1.1.3.1 Sat Dec 5 11:14:43 1998
+++ linux/fs/select.c Sat Dec 5 11:58:48 1998
@@ -17,7 +17,6 @@

#include <asm/uaccess.h>

-#define ROUND_UP(x,y) (((x)+(y)-1)/(y))
#define DEFAULT_POLLMASK (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)

/*
@@ -212,20 +211,17 @@
long timeout;
int ret;

- timeout = MAX_SCHEDULE_TIMEOUT;
if (tvp) {
- time_t sec, usec;
+ struct timeval ktvp;

if ((ret = verify_area(VERIFY_READ, tvp, sizeof(*tvp)))
- || (ret = __get_user(sec, &tvp->tv_sec))
- || (ret = __get_user(usec, &tvp->tv_usec)))
+ || (ret = __get_user(ktvp.tv_sec, &tvp->tv_sec))
+ || (ret = __get_user(ktvp.tv_usec, &tvp->tv_usec)))
goto out_nofds;

- if ((unsigned long) sec < MAX_SELECT_SECONDS) {
- timeout = ROUND_UP(usec, 1000000/HZ);
- timeout += sec * (unsigned long) HZ;
- }
- }
+ timeout = timeval_to_jiffies(&ktvp);
+ } else
+ timeout = MAX_SCHEDULE_TIMEOUT;

ret = -ENOMEM;
fds = (fd_set_buffer *) __get_free_page(GFP_KERNEL);
@@ -247,14 +243,10 @@
ret = do_select(n, fds, &timeout);

if (tvp && !(current->personality & STICKY_TIMEOUTS)) {
- time_t sec = 0, usec = 0;
- if (timeout) {
- sec = timeout / HZ;
- usec = timeout % HZ;
- usec *= (1000000/HZ);
- }
- put_user(sec, &tvp->tv_sec);
- put_user(usec, &tvp->tv_usec);
+ struct timeval ktvp;
+ jiffies_to_timeval(timeout, &ktvp);
+ put_user(ktvp.tv_sec, &tvp->tv_sec);
+ put_user(ktvp.tv_usec, &tvp->tv_usec);
}

if (ret < 0)
@@ -334,7 +326,7 @@
if (timeout < 0)
timeout = MAX_SCHEDULE_TIMEOUT;
else if (timeout)
- timeout = (timeout*HZ+999)/1000+1;
+ timeout = msec_to_jiffies(timeout);

err = -ENOMEM;
if (timeout) {

The time patch is needed to fix the poll() timeout overflow fine. The
first part of the patches is needed to fix the ugly
idle-task-not-istant-rescheduling problem...

There' s also a lot of other good stuff (I think ;) in my arca-48, you
could take a look at it (it's against your 131-ac4).

Andrea Arcangeli

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/