[PATCH] kflushd and sync (2.3.18ac5)

Daniel J Blueman (daniel.j.blueman@stud.umist.ac.uk)
Thu, 23 Sep 1999 09:48:52 +0100


This is a multi-part message in MIME format.

------=_NextPart_000_000B_01BF05A8.D7BE97E0
Content-Type: text/plain;
charset="iso-8859-1"
Content-Transfer-Encoding: 7bit

Here's my patch against 2.3.18ac5 offering much lower processor overhead.
It seems to work well, and I've had no fs corruption in the last week or so
using it.

Comments/suggestions are welcome.

Dan

__________________________
Daniel J Blueman - daniel.j.blueman@stud.umist.ac.uk
Undergraduate - BSc Computing Science
UMIST university - Manchester

------=_NextPart_000_000B_01BF05A8.D7BE97E0
Content-Type: application/octet-stream;
name="buffer.c.diff"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
filename="buffer.c.diff"

--- buffer.orig.c Thu Sep 23 02:12:46 1999=0A=
+++ buffer.c Thu Sep 23 09:19:40 1999=0A=
@@ -24,6 +24,11 @@=0A=
* - RMK=0A=
*/=0A=
=0A=
+/*=0A=
+ * Rewrote bdflush() and sync_old_buffers() for improved performance=0A=
+ * - Daniel J Blueman 9/1999=0A=
+ */=0A=
+=0A=
/* Thread it... -DaveM */=0A=
=0A=
#include <linux/sched.h>=0A=
@@ -1235,7 +1240,7 @@=0A=
=0A=
/*=0A=
* subtle. We release buffer-heads only if this is=0A=
- * the 'final' flushpage. We have invalidated the get_block=0A=
+ * the 'final' flushpage. We have invalidated the bmap=0A=
* cached value unconditionally, so real IO is not=0A=
* possible anymore.=0A=
*=0A=
@@ -1738,7 +1743,7 @@=0A=
*/=0A=
=0A=
int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], =0A=
- kdev_t dev, unsigned long b[], int size)=0A=
+ kdev_t dev, unsigned long b[], int size, int bmap)=0A=
{=0A=
int err;=0A=
int length;=0A=
@@ -1861,7 +1866,7 @@=0A=
return err;=0A=
=0A=
error:=0A=
- /* We got an error allocating the bh'es. Just free the current=0A=
+ /* We got an error allocation the bh'es. Just free the current=0A=
buffer_heads and exit. */=0A=
spin_lock(&unused_list_lock);=0A=
for (i =3D bhind; --i >=3D 0; ) {=0A=
@@ -1955,7 +1960,7 @@=0A=
=0A=
/*=0A=
* Generic "read page" function for block devices that have the normal=0A=
- * get_block functionality. This is most of the block device =
filesystems.=0A=
+ * bmap functionality. This is most of the block device filesystems.=0A=
* Reads the page asynchronously --- the unlock_buffer() and=0A=
* mark_buffer_uptodate() functions propagate buffer state into the=0A=
* page struct once IO has completed.=0A=
@@ -2225,6 +2230,7 @@=0A=
* response to dirty buffers. Once this process is activated, we write =
back=0A=
* a limited number of buffers to the disks and then go back to sleep =
again.=0A=
*/=0A=
+=0A=
static DECLARE_WAIT_QUEUE_HEAD(bdflush_wait);=0A=
static DECLARE_WAIT_QUEUE_HEAD(bdflush_done);=0A=
struct task_struct *bdflush_tsk =3D 0;=0A=
@@ -2251,46 +2257,46 @@=0A=
=0A=
static int sync_old_buffers(void)=0A=
{=0A=
- int nlist;=0A=
+ struct buffer_head *bh, *next;=0A=
+ int nr, list;=0A=
=0A=
lock_kernel();=0A=
sync_supers(0);=0A=
sync_inodes(0);=0A=
unlock_kernel();=0A=
=0A=
- for(nlist =3D BUF_LOCKED; nlist <=3D BUF_DIRTY; nlist++) {=0A=
- struct buffer_head *bh;=0A=
- repeat:=0A=
- spin_lock(&lru_list_lock);=0A=
- bh =3D lru_list[nlist];=0A=
- if(bh) {=0A=
- struct buffer_head *next;=0A=
- int i;=0A=
- for (i =3D nr_buffers_type[nlist]; i-- > 0; bh =3D next) {=0A=
- next =3D bh->b_next_free;=0A=
+ for (list =3D BUF_LOCKED; list <=3D BUF_DIRTY; list++) {=0A=
+ for (;;) {=0A=
+ spin_lock(&lru_list_lock);=0A=
+ next =3D lru_list[list];=0A=
+ nr =3D nr_buffers_type[list];=0A=
=0A=
- /* If the buffer is not on the proper list,=0A=
- * then refile it.=0A=
- */=0A=
- if ((nlist =3D=3D BUF_DIRTY &&=0A=
- (!buffer_dirty(bh) && !buffer_locked(bh))) ||=0A=
- (nlist =3D=3D BUF_LOCKED && !buffer_locked(bh))) {=0A=
- __refile_buffer(bh);=0A=
- continue;=0A=
- }=0A=
- =0A=
- if (buffer_locked(bh) || !buffer_dirty(bh))=0A=
- continue;=0A=
+ while (nr-- > 0) {=0A=
+ bh =3D next;=0A=
+ next =3D next->b_next_free;=0A=
=0A=
- /* OK, now we are committed to write it out. */=0A=
- atomic_inc(&bh->b_count);=0A=
- spin_unlock(&lru_list_lock);=0A=
- ll_rw_block(WRITE, 1, &bh);=0A=
- atomic_dec(&bh->b_count);=0A=
- goto repeat;=0A=
+ /* We don't touch locked buffers */=0A=
+ if (!buffer_locked(bh)) {=0A=
+ /* Check if buffer is suitable for writing out */=0A=
+ if (buffer_dirty(bh))=0A=
+ goto write;=0A=
+=0A=
+ /* Refile buffer if in the wrong list */=0A=
+ if (list =3D=3D BUF_LOCKED || (list =3D=3D BUF_DIRTY && =
!buffer_dirty(bh)))=0A=
+ __refile_buffer(bh);=0A=
+ }=0A=
}=0A=
+=0A=
+ spin_unlock(&lru_list_lock);=0A=
+ break;=0A=
+=0A=
+ write:=0A=
+ /* Commit buffer to block device */=0A=
+ atomic_inc(&bh->b_count);=0A=
+ spin_unlock(&lru_list_lock);=0A=
+ ll_rw_block(WRITE, 1, &bh);=0A=
+ atomic_dec(&bh->b_count);=0A=
}=0A=
- spin_unlock(&lru_list_lock);=0A=
}=0A=
run_task_queue(&tq_disk);=0A=
return 0;=0A=
@@ -2354,91 +2360,66 @@=0A=
* the syscall above, but now we launch it ourselves internally with=0A=
* kernel_thread(...) directly after the first thread in init/main.c=0A=
*/=0A=
+=0A=
int bdflush(void * unused) =0A=
{=0A=
- /*=0A=
- * We have a bare-bones task_struct, and really should fill=0A=
- * in a few more things so "top" and /proc/2/{exe,root,cwd}=0A=
- * display semi-sane things. Not real crucial though... =0A=
- */=0A=
+ struct buffer_head *bh, *next;=0A=
+ int nr, written, list;=0A=
=0A=
+ /* Setup thread information */=0A=
current->session =3D 1;=0A=
current->pgrp =3D 1;=0A=
sprintf(current->comm, "kflushd");=0A=
bdflush_tsk =3D current;=0A=
=0A=
for (;;) {=0A=
- int nlist;=0A=
-=0A=
CHECK_EMERGENCY_SYNC=0A=
=0A=
- for(nlist =3D BUF_LOCKED; nlist <=3D BUF_DIRTY; nlist++) {=0A=
- int nr, major, written =3D 0;=0A=
- struct buffer_head *next;=0A=
+ for (list =3D BUF_LOCKED; list <=3D BUF_DIRTY; list++) {=0A=
+ written =3D 0;=0A=
=0A=
- repeat:=0A=
- spin_lock(&lru_list_lock);=0A=
- next =3D lru_list[nlist];=0A=
- nr =3D nr_buffers_type[nlist];=0A=
- while (nr-- > 0) {=0A=
- struct buffer_head *bh =3D next;=0A=
+ do {=0A=
+ spin_lock(&lru_list_lock);=0A=
+ next =3D lru_list[list];=0A=
+ nr =3D nr_buffers_type[list];=0A=
=0A=
- next =3D next->b_next_free;=0A=
- =0A=
- /* If the buffer is not on the correct list,=0A=
- * then refile it.=0A=
- */=0A=
- if ((nlist =3D=3D BUF_DIRTY &&=0A=
- (!buffer_dirty(bh) && !buffer_locked(bh))) ||=0A=
- (nlist =3D=3D BUF_LOCKED && !buffer_locked(bh))) {=0A=
- __refile_buffer(bh);=0A=
- continue;=0A=
+ while (nr-- > 0) {=0A=
+ bh =3D next;=0A=
+ next =3D next->b_next_free;=0A=
+=0A=
+ /* We don't touch locked buffers */=0A=
+ if (!buffer_locked(bh)) {=0A=
+ /* Check if buffer is suitable for writing out */=0A=
+ if (buffer_dirty(bh) && (!time_before(jiffies, bh->b_flushtime) =
|| too_many_dirty_buffers))=0A=
+ goto write;=0A=
+=0A=
+ /* Refile buffer if in the wrong list */=0A=
+ if (list =3D=3D BUF_LOCKED || (list =3D=3D BUF_DIRTY && =
!buffer_dirty(bh)))=0A=
+ __refile_buffer(bh);=0A=
+ }=0A=
}=0A=
=0A=
- /* If we aren't in panic mode, don't write out too much=0A=
- * at a time. Also, don't write out buffers we don't=0A=
- * really have to write out yet..=0A=
- */=0A=
- if (!too_many_dirty_buffers) {=0A=
- if (written > bdf_prm.b_un.ndirty)=0A=
- break;=0A=
- if (time_before(jiffies, bh->b_flushtime))=0A=
- continue;=0A=
- }=0A=
-=0A=
- if (buffer_locked(bh) || !buffer_dirty(bh))=0A=
- continue;=0A=
-=0A=
- major =3D MAJOR(bh->b_dev);=0A=
- written++;=0A=
+ spin_unlock(&lru_list_lock);=0A=
+ break;=0A=
=0A=
- /*=0A=
- * For the loop major we can try to do asynchronous writes,=0A=
- * but we have to guarantee that we're making some progress..=0A=
- */=0A=
+ write:=0A=
+ /* Commit buffer to block device */=0A=
atomic_inc(&bh->b_count);=0A=
spin_unlock(&lru_list_lock);=0A=
ll_rw_block(WRITE, 1, &bh);=0A=
atomic_dec(&bh->b_count);=0A=
- goto repeat;=0A=
- }=0A=
- spin_unlock(&lru_list_lock);=0A=
+ written++;=0A=
+ } while (written < bdf_prm.b_un.ndirty || too_many_dirty_buffers);=0A=
}=0A=
run_task_queue(&tq_disk);=0A=
wake_up(&bdflush_done);=0A=
- =0A=
- /*=0A=
- * If there are still a lot of dirty buffers around,=0A=
- * skip the sleep and flush some more. Otherwise, we=0A=
- * sleep for a while and mark us as not being in panic=0A=
- * mode..=0A=
- */=0A=
+=0A=
if (!too_many_dirty_buffers || nr_buffers_type[BUF_DIRTY] < =
bdf_prm.b_un.ndirty) {=0A=
too_many_dirty_buffers =3D 0;=0A=
spin_lock_irq(&current->sigmask_lock);=0A=
flush_signals(current);=0A=
spin_unlock_irq(&current->sigmask_lock);=0A=
- interruptible_sleep_on_timeout(&bdflush_wait, 5*HZ);=0A=
+ interruptible_sleep_on_timeout(&bdflush_wait, 5 * HZ);=0A=
}=0A=
}=0A=
}=0A=

------=_NextPart_000_000B_01BF05A8.D7BE97E0--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/