[PATCH] isofs meta-data beyond 4GB

From: Paul Serice
Date: Fri May 21 2004 - 20:08:02 EST


The ISO 9660 standard allows meta-data to be stored at almost any
arbitrary byte on the file system. The current inode scheme uses the
byte offset as the inode value making it easy to find the underlying
block and block offset.

This scheme is subject to obvious integer overflow problems that
prevents it from being able to reach meta-data beyond the 4GB
boundary. Looking back through the archives, this problem was
anticipated but discounted because mkisofs puts its meta-data near the
beginning of the file system.

However, there are at least two common scenarios where meta-data must
be located beyond the 4GB boundary. First, growisofs needs to do this
when it merges a new session with an old session that ended beyond the
4GB boundary. The following URL has more information:

http://fy.chalmers.se/~appro/linux/DVD+RW/#isofs4gb

Second (and what prompted this patch), I've written two programs that
together allow arbitrarily large backups to be made to a set of DVDs
without the need for any intermediate files (in the same manner as
when working with tapes). In order to use all the space on a DVD, I'm
forced to write the meta-data beyond the current 4GB inode boundary
because I don't have all it until I reach the end of the file system.
The following URL has more information:

http://www.serice.net/shunt/

To support these types of DVDs, the inode scheme for isofs must be
changed. The patch I'm submitting for review provides one such
comprehensive inode scheme. It assigns inode numbers sequentially
starting with 1 for the root inode. It keeps a mapping for each inode
that is indexed both by the inode number and by the block number and
block offset. The indexes are implemented using two rbtrees and
are protected by a reader-writer spin lock.

Thanks
Paul Serice


========================================================================


diff -uprN -X dontdiff linux-2.6.6/fs/isofs/dir.c linux-2.6.6-isofs/fs/isofs/dir.c
--- linux-2.6.6/fs/isofs/dir.c 2004-04-03 21:37:59.000000000 -0600
+++ linux-2.6.6-isofs/fs/isofs/dir.c 2004-05-19 08:24:27.698680016 -0500
@@ -115,6 +115,7 @@ static int do_isofs_readdir(struct inode
char *p = NULL; /* Quiet GCC */
struct iso_directory_record *de;
struct isofs_sb_info *sbi = ISOFS_SB(inode->i_sb);
+ struct isofs_inode_map *inode_map = &sbi->s_inode_map;

offset = filp->f_pos & (bufsize - 1);
block = filp->f_pos >> bufbits;
@@ -129,8 +130,17 @@ static int do_isofs_readdir(struct inode
}

de = (struct iso_directory_record *) (bh->b_data + offset);
- if (first_de)
- inode_number = (bh->b_blocknr << bufbits) + offset;
+ if (first_de) {
+ inode_number =
+ isofs_inode_map_index_by_phy(inode_map,
+ bh->b_blocknr,
+ offset);
+ if (inode_number == 0) {
+ printk(KERN_ERR "iso9660: Unable to index "
+ "inode!\n");
+ return 0;
+ }
+ }

de_len = *(unsigned char *) de;

diff -uprN -X dontdiff linux-2.6.6/fs/isofs/inode.c linux-2.6.6-isofs/fs/isofs/inode.c
--- linux-2.6.6/fs/isofs/inode.c 2004-05-19 07:55:33.751280088 -0500
+++ linux-2.6.6-isofs/fs/isofs/inode.c 2004-05-20 05:53:38.565996600 -0500
@@ -7,6 +7,7 @@
* 1995 Mark Dobie - allow mounting of some weird VideoCDs and PhotoCDs.
* 1997 Gordon Chaffee - Joliet CDs
* 1998 Eric Lammerts - ISO 9660 Level 3
+ * 2004 Paul Serice - Comprehensive Inode Scheme
*/

#include <linux/config.h>
@@ -585,6 +586,7 @@ static int isofs_fill_super(struct super
return -ENOMEM;
s->s_fs_info = sbi;
memset(sbi, 0, sizeof(struct isofs_sb_info));
+ isofs_inode_map_init(&sbi->s_inode_map);

if (!parse_options((char *) data, &opt))
goto out_freesbi;
@@ -738,19 +740,18 @@ root_found:
/* Set this for reference. Its not currently used except on write
which we don't have .. */

- /* RDE: data zone now byte offset! */
-
- first_data_zone = ((isonum_733 (rootp->extent) +
- isonum_711 (rootp->ext_attr_length))
- << sbi->s_log_zone_size);
+ first_data_zone = isonum_733 (rootp->extent) +
+ isonum_711 (rootp->ext_attr_length);
sbi->s_firstdatazone = first_data_zone;
+ isofs_inode_map_set_root_inode(&sbi->s_inode_map,
+ /* block */ sbi->s_firstdatazone,
+ /* offset */ 0);
#ifndef BEQUIET
printk(KERN_DEBUG "Max size:%ld Log zone size:%ld\n",
sbi->s_max_size,
1UL << sbi->s_log_zone_size);
printk(KERN_DEBUG "First datazone:%ld Root inode number:%ld\n",
- sbi->s_firstdatazone >> sbi->s_log_zone_size,
- sbi->s_firstdatazone);
+ sbi->s_firstdatazone, 1);
if(sbi->s_high_sierra)
printk(KERN_DEBUG "Disc in High Sierra format.\n");
#endif
@@ -767,9 +768,8 @@ root_found:
pri = (struct iso_primary_descriptor *) sec;
rootp = (struct iso_directory_record *)
pri->root_directory_record;
- first_data_zone = ((isonum_733 (rootp->extent) +
- isonum_711 (rootp->ext_attr_length))
- << sbi->s_log_zone_size);
+ first_data_zone = isonum_733 (rootp->extent) +
+ isonum_711 (rootp->ext_attr_length);
}

/*
@@ -835,7 +835,7 @@ root_found:
* the s_rock flag. Once we have the final s_rock value,
* we then decide whether to use the Joliet descriptor.
*/
- inode = iget(s, sbi->s_firstdatazone);
+ inode = iget(s, 1);

/*
* If this disk has both Rock Ridge and Joliet on it, then we
@@ -853,8 +853,11 @@ root_found:
sbi->s_firstdatazone = first_data_zone;
printk(KERN_DEBUG
"ISOFS: changing to secondary root\n");
+ isofs_inode_map_set_root_inode(&sbi->s_inode_map,
+ /* block */ sbi->s_firstdatazone,
+ /* offset */ 0);
iput(inode);
- inode = iget(s, sbi->s_firstdatazone);
+ inode = iget(s, 1);
}
}

@@ -1044,7 +1047,7 @@ static int isofs_get_block(struct inode
return isofs_get_blocks(inode, iblock, &bh_result, 1) ? 0 : -EIO;
}

-static int isofs_bmap(struct inode *inode, int block)
+static int isofs_bmap(struct inode *inode, sector_t block)
{
struct buffer_head dummy;
int error;
@@ -1097,22 +1100,31 @@ static inline void test_and_set_gid(gid_

static int isofs_read_level3_size(struct inode * inode)
{
- unsigned long f_pos = inode->i_ino;
unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
- int high_sierra = ISOFS_SB(inode->i_sb)->s_high_sierra;
+ struct isofs_sb_info* sbi = ISOFS_SB(inode->i_sb);
+ int high_sierra = sbi->s_high_sierra;
struct buffer_head * bh = NULL;
- unsigned long block, offset;
+ unsigned long block, offset, block_offset;
int i = 0;
int more_entries = 0;
struct iso_directory_record * tmpde = NULL;
struct iso_inode_info *ei = ISOFS_I(inode);
+ struct isofs_inode_map *map = &sbi->s_inode_map;
+ struct isofs_inode_map_node *map_node;
+
+ /* Map the inode number to its block and offset. */
+ map_node = isofs_inode_map_find_by_ino(map, inode->i_ino);
+ if (map_node == NULL) {
+ printk(KERN_ERR "iso9660: Internal Error: Unable to find "
+ "inode (%lu) in map.\n", inode->i_ino);
+ return 0;
+ }
+ block = map_node->block;
+ offset = map_node->block_offset;

inode->i_size = 0;
ei->i_next_section_ino = 0;

- block = f_pos >> ISOFS_BUFFER_BITS(inode);
- offset = f_pos & (bufsize-1);
-
do {
struct iso_directory_record * de;
unsigned int de_len;
@@ -1128,12 +1140,12 @@ static int isofs_read_level3_size(struct
if (de_len == 0) {
brelse(bh);
bh = NULL;
- f_pos = (f_pos + ISOFS_BLOCK_SIZE) & ~(ISOFS_BLOCK_SIZE - 1);
- block = f_pos >> ISOFS_BUFFER_BITS(inode);
+ ++block;
offset = 0;
continue;
}

+ block_offset = offset;
offset += de_len;

/* Make sure we have a full directory entry */
@@ -1159,12 +1171,15 @@ static int isofs_read_level3_size(struct
}

inode->i_size += isonum_733(de->size);
- if (i == 1)
- ei->i_next_section_ino = f_pos;
+ if (i == 1) {
+ ei->i_next_section_ino =
+ isofs_inode_map_index_by_phy(map,
+ block,
+ block_offset);
+ }

more_entries = de->flags[-high_sierra] & 0x80;

- f_pos += de_len;
i++;
if(i > 100)
goto out_toomany;
@@ -1190,8 +1205,8 @@ out_noread:
out_toomany:
printk(KERN_INFO "isofs_read_level3_size: "
"More than 100 file sections ?!?, aborting...\n"
- "isofs_read_level3_size: inode=%lu ino=%lu\n",
- inode->i_ino, f_pos);
+ "isofs_read_level3_size: inode=%lu\n",
+ inode->i_ino);
goto out;
}

@@ -1200,7 +1215,7 @@ static void isofs_read_inode(struct inod
struct super_block *sb = inode->i_sb;
struct isofs_sb_info *sbi = ISOFS_SB(sb);
unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
- int block = inode->i_ino >> ISOFS_BUFFER_BITS(inode);
+ sector_t block;
int high_sierra = sbi->s_high_sierra;
struct buffer_head * bh = NULL;
struct iso_directory_record * de;
@@ -1209,12 +1224,21 @@ static void isofs_read_inode(struct inod
unsigned long offset;
int volume_seq_no, i;
struct iso_inode_info *ei = ISOFS_I(inode);
+ struct isofs_inode_map *map = &sbi->s_inode_map;
+ struct isofs_inode_map_node *map_node;
+
+ /* Map the inode number to its block and offset. */
+ map_node = isofs_inode_map_find_by_ino(map, inode->i_ino);
+ if (map_node == NULL) {
+ goto fail;
+ }
+ block = map_node->block;
+ offset = map_node->block_offset;

bh = sb_bread(inode->i_sb, block);
if (!bh)
goto out_badread;

- offset = (inode->i_ino & (bufsize - 1));
de = (struct iso_directory_record *) (bh->b_data + offset);
de_len = *(unsigned char *) de;

@@ -1424,11 +1448,17 @@ static struct super_block *isofs_get_sb(
return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
}

+static void isofs_kill_sb(struct super_block *sb)
+{
+ isofs_inode_map_destroy(&ISOFS_SB(sb)->s_inode_map);
+ kill_block_super(sb);
+}
+
static struct file_system_type iso9660_fs_type = {
.owner = THIS_MODULE,
.name = "iso9660",
.get_sb = isofs_get_sb,
- .kill_sb = kill_block_super,
+ .kill_sb = isofs_kill_sb,
.fs_flags = FS_REQUIRES_DEV,
};

@@ -1437,20 +1467,25 @@ static int __init init_iso9660_fs(void)
int err = init_inodecache();
if (err)
goto out;
+ err = init_inode_map_node_cache();
+ if (err)
+ goto out1;
#ifdef CONFIG_ZISOFS
err = zisofs_init();
if (err)
- goto out1;
+ goto out2;
#endif
err = register_filesystem(&iso9660_fs_type);
if (err)
- goto out2;
+ goto out3;
return 0;
-out2:
+out3:
#ifdef CONFIG_ZISOFS
zisofs_cleanup();
-out1:
+out2:
#endif
+ destroy_inode_map_node_cache();
+out1:
destroy_inodecache();
out:
return err;
@@ -1462,6 +1497,7 @@ static void __exit exit_iso9660_fs(void)
#ifdef CONFIG_ZISOFS
zisofs_cleanup();
#endif
+ destroy_inode_map_node_cache();
destroy_inodecache();
}

diff -uprN -X dontdiff linux-2.6.6/fs/isofs/inode_map.c linux-2.6.6-isofs/fs/isofs/inode_map.c
--- linux-2.6.6/fs/isofs/inode_map.c 1969-12-31 18:00:00.000000000 -0600
+++ linux-2.6.6-isofs/fs/isofs/inode_map.c 2004-05-20 06:32:06.573126184 -0500
@@ -0,0 +1,411 @@
+#include <linux/buffer_head.h>
+#include <linux/iso_fs.h>
+#include <linux/rbtree.h>
+
+/*
+ * File meta-data can appear at almost any byte location on an ISO
+ * 9660 file system. The previous version of the linux iso9660 file
+ * system addressed this problem by using the byte offset (relative to
+ * the beginning of the file system) as the inode number. It then
+ * used the inode number to locate the meta-data. This worked well
+ * until DVDs broke the 4GB barrier causing the byte offset to
+ * overflow its "unsigned long" on architectures where an unsigned
+ * long is only 32-bits wide.
+ *
+ * What follows is designed to allow access to all meta-data
+ * regardless of its location on the file system by assigning a
+ * simple, sequentially allocated inode number. It then creates a
+ * mapping between the inode number and the block and block_offset
+ * where the meta-data can be found. The mapping is stored in
+ * sbi->s_inode_map which uses two standard linux rbtrees to index by
+ * physical layout of the media and by the inode number.
+ *
+ * Where you see "inode_map_node" below, read it as "inode-map node,"
+ * i.e., a node for the rbtree that maps inodes to physical layout. I
+ * am not altogether happy with the name, but it leaves room in the
+ * name space if other mappings are needed.
+ */
+
+static kmem_cache_t *isofs_inode_map_node_cachep;
+
+int init_inode_map_node_cache(void)
+{
+ isofs_inode_map_node_cachep =
+ kmem_cache_create("isofs_inode_map_node_cache",
+ sizeof(struct isofs_inode_map_node),
+ /* offset */ 0,
+ SLAB_HWCACHE_ALIGN | SLAB_RECLAIM_ACCOUNT,
+ /* ctor */ NULL,
+ /* dtor */ NULL);
+
+ if (isofs_inode_map_node_cachep == NULL) {
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void destroy_inode_map_node_cache(void)
+{
+ if (kmem_cache_destroy(isofs_inode_map_node_cachep)) {
+ printk(KERN_INFO "isofs: Unable to destroy"
+ "node cache for the inode map.\n");
+ }
+}
+
+static inline void
+isofs_inode_map_node_destroy_unlocked(struct isofs_inode_map_node *node)
+{
+ kmem_cache_free(isofs_inode_map_node_cachep, node);
+}
+
+static inline struct isofs_inode_map_node *
+isofs_inode_map_node_alloc_unlocked(void)
+{
+ struct isofs_inode_map_node *rv =
+ kmem_cache_alloc(isofs_inode_map_node_cachep, SLAB_KERNEL);
+ memset(rv, 0, sizeof(*rv));
+ return rv;
+}
+
+void isofs_inode_map_init(struct isofs_inode_map *map)
+{
+ rwlock_init(&map->lock);
+ map->next_inode_number = 1;
+ map->index_by_phy = RB_ROOT;
+ map->index_by_ino = RB_ROOT;
+}
+
+/* This function returns 0 if the inode_number overflows.
+ *
+ * Because the root inode is always the first inode loaded, it will
+ * always be assigned 1 as its inode number.
+ *
+ * You should already hold the rwlock for writing when calling this
+ * function. */
+static inline unsigned long
+isofs_next_inode_number_unlocked(struct isofs_inode_map *map)
+{
+ unsigned long rv = 0;
+ if (map->next_inode_number) {
+ rv = map->next_inode_number++;
+ }
+ return rv;
+}
+
+/* Allow internal code to put back an inode number if an error occurs
+ * after taking a number. Do not try to put back an inode if
+ * isofs_next_inode_number_unlocked() returned 0. You must hold the
+ * rwlock for writing continuously from the time you call
+ * isofs_next_inode_number() until the time you call this function. */
+static inline void
+isofs_prev_inode_number_unlocked(struct isofs_inode_map *map)
+{
+ --map->next_inode_number;
+}
+
+/* Returns the map node that stores the same inode number as
+ * "inode_number". If no node is found, NULL is returned. This code
+ * is unlocked to allow for code reuse by routines that are holding
+ * the rwlock for writing (for example). */
+static inline struct isofs_inode_map_node *
+isofs_inode_map_find_by_ino_unlocked(struct isofs_inode_map *map,
+ unsigned long inode_number)
+{
+ struct rb_node *n = NULL;
+ struct isofs_inode_map_node *tmp = NULL;
+ struct isofs_inode_map_node *rv = NULL;
+
+ n = map->index_by_ino.rb_node;
+
+ while (n) {
+ tmp = rb_entry(n, struct isofs_inode_map_node, node_for_ino);
+ if (inode_number < tmp->inode_number) {
+ n = n->rb_left;
+ } else if (inode_number > tmp->inode_number) {
+ n = n->rb_right;
+ } else {
+ rv = tmp;
+ break;
+ }
+ }
+
+ return rv;
+}
+
+/* Returns the map node that stores the same inode number as
+ * "inode_number". If no node is found, NULL is returned. */
+struct isofs_inode_map_node *
+isofs_inode_map_find_by_ino(struct isofs_inode_map *map,
+ unsigned long inode_number)
+{
+ struct isofs_inode_map_node *rv;
+ read_lock(&map->lock);
+ rv = isofs_inode_map_find_by_ino_unlocked(map, inode_number);
+ read_unlock(&map->lock);
+ return rv;
+}
+
+/* Returns the map node that stores the same block and block offset as
+ * "block" and "block_offset". If no node is found, NULL is returned.
+ * This code is unlocked to allow for code reuse by routines that are
+ * holding the rwlock for writing (for example). */
+static inline struct isofs_inode_map_node *
+isofs_inode_map_find_by_phy_unlocked(struct isofs_inode_map *map,
+ sector_t block,
+ unsigned long block_offset)
+{
+ struct rb_node *n = NULL;
+ struct isofs_inode_map_node *tmp = NULL;
+ struct isofs_inode_map_node *rv = NULL;
+
+ n = map->index_by_phy.rb_node;
+
+ while (n) {
+ tmp = rb_entry(n, struct isofs_inode_map_node, node_for_phy);
+ if (block < tmp->block) {
+ n = n->rb_left;
+ } else if (block > tmp->block) {
+ n = n->rb_right;
+ } else if (block_offset < tmp->block_offset) {
+ n= n->rb_left;
+ } else if (block_offset > tmp->block_offset) {
+ n = n->rb_right;
+ } else {
+ rv = tmp;
+ break;
+ }
+ }
+
+ return rv;
+}
+
+/* Returns the map node that stores the same block and block offset as
+ * "block" and "block_offset". If no node is found, NULL is
+ * returned. */
+struct isofs_inode_map_node *
+isofs_inode_map_find_by_phy(struct isofs_inode_map *map,
+ sector_t block,
+ unsigned long block_offset)
+{
+ struct isofs_inode_map_node *rv;
+ read_lock(&map->lock);
+ rv = isofs_inode_map_find_by_phy_unlocked(map, block, block_offset);
+ read_unlock(&map->lock);
+ return rv;
+}
+
+/* This function does not allocate any memory. It simply inserts
+ * "node" into the ino tree. It is really just an extension of
+ * isofs_inode_map_index_by_phy_unlocked(). You must hold the rwlock
+ * for writing when calling this function. */
+static inline void
+isofs_inode_map_index_by_ino_unlocked(struct isofs_inode_map *map,
+ struct isofs_inode_map_node *node)
+{
+ struct rb_node **child = NULL;
+ struct rb_node *parent = NULL;
+ struct isofs_inode_map_node *tmp = NULL;
+ unsigned long inode_number = node->inode_number;
+
+ child = &map->index_by_ino.rb_node;
+
+ /* Find the node's sorted location in the rbtree. */
+ while (*child) {
+ parent = *child;
+ tmp = rb_entry(parent,
+ struct isofs_inode_map_node,
+ node_for_ino);
+ if (inode_number < tmp->inode_number) {
+ child = &(*child)->rb_left;
+ } else if (inode_number > tmp->inode_number) {
+ child = &(*child)->rb_right;
+ } else {
+ break;
+ }
+ }
+
+ if (*child == NULL) {
+ /* Insert */
+ rb_link_node(&node->node_for_ino, parent, child);
+ /* Balance */
+ rb_insert_color(&node->node_for_ino, &map->index_by_ino);
+ } else {
+ printk(KERN_ERR "isofs: Internal error: "
+ "node already indexed!\n");
+ }
+}
+
+/* Returns the inode number assigned for "block" and "block_offset."
+ * If no inode number is currently assigned, it automatically assigns
+ * the next available number in sequence and returns that as the inode
+ * number. If an error occurs, 0 is returned. You must hold the
+ * rwlock for writing when calling this function. */
+static inline unsigned long
+isofs_inode_map_index_by_phy_unlocked(struct isofs_inode_map *map,
+ sector_t block,
+ unsigned long block_offset)
+{
+ unsigned long rv = 0;
+ struct rb_node **child = NULL;
+ struct rb_node *parent = NULL;
+ struct isofs_inode_map_node *tmp = NULL;
+ struct isofs_inode_map_node *node = NULL;
+
+ child = &map->index_by_phy.rb_node;
+
+ /* Find the node's sorted location in the rbtree. */
+ while (*child) {
+ parent = *child;
+ tmp = rb_entry(parent,
+ struct isofs_inode_map_node,
+ node_for_phy);
+ if (block < tmp->block) {
+ child = &(*child)->rb_left;
+ } else if (block > tmp->block) {
+ child = &(*child)->rb_right;
+ } else if (block_offset < tmp->block_offset) {
+ child = &(*child)->rb_left;
+ } else if (block_offset > tmp->block_offset) {
+ child = &(*child)->rb_right;
+ } else {
+ node = tmp;
+ break;
+ }
+ }
+
+ if (node == NULL) {
+ /* Grab the next inode number. */
+ unsigned long ino = isofs_next_inode_number_unlocked(map);
+ if (ino == 0) {
+ printk(KERN_ERR "isofs: Inode number overflowed.\n");
+ goto out;
+ }
+ /* Create new node. */
+ node = isofs_inode_map_node_alloc_unlocked();
+ if (node == NULL) {
+ printk(KERN_ERR "isofs: Unable to allocate "
+ "map node.\n");
+ isofs_prev_inode_number_unlocked(map);
+ goto out;
+ }
+ node->inode_number = ino;
+ node->block = block;
+ node->block_offset = block_offset;
+ /* Insert */
+ rb_link_node(&node->node_for_phy, parent, child);
+ /* Balance */
+ rb_insert_color(&node->node_for_phy, &map->index_by_phy);
+ /* Atomically update the other tree while the rwlock
+ * is still held. */
+ isofs_inode_map_index_by_ino_unlocked(map, node);
+ }
+
+ rv = node->inode_number;
+
+ out:
+ return rv;
+}
+
+/* Returns the inode number assigned for "block" and "block_offset."
+ * If an error occurs, 0 is returned. If no inode number is currently
+ * assigned, it automatically assigns the next available number in
+ * sequence and returns that as the inode number. */
+unsigned long
+isofs_inode_map_index_by_phy(struct isofs_inode_map *map,
+ sector_t block,
+ unsigned long block_offset)
+{
+ unsigned long rv = 0;
+ write_lock(&map->lock);
+ rv = isofs_inode_map_index_by_phy_unlocked(map, block, block_offset);
+ write_unlock(&map->lock);
+ return rv;
+}
+
+/* The code in isofs_fill_super() needs to be able to change the
+ * location of the root inode. */
+void isofs_inode_map_set_root_inode(struct isofs_inode_map *map,
+ sector_t block,
+ unsigned long block_offset)
+{
+ struct isofs_inode_map_node *map_node;
+
+ write_lock(&map->lock);
+
+ map_node = isofs_inode_map_find_by_ino_unlocked(map, 1);
+ if (map_node) {
+ map_node->block = block;
+ map_node->block_offset = block_offset;
+ } else {
+ if (!isofs_inode_map_index_by_phy_unlocked(map,
+ block,
+ block_offset))
+ {
+ printk(KERN_ERR "isofs: Unable to map root inode!\n");
+ }
+ }
+
+ write_unlock(&map->lock);
+}
+
+/* This function is called when the super block is killed. It returns
+ * each node used in the mapping to the inode map cache. */
+void isofs_inode_map_destroy(struct isofs_inode_map *map)
+{
+ struct rb_node *n = NULL;
+ struct rb_node *parent = NULL;
+ struct isofs_inode_map_node *tmp = NULL;
+
+ write_lock(&map->lock);
+
+ n = map->index_by_ino.rb_node;
+
+ if (n == NULL) {
+ BUG_ON(map->index_by_phy.rb_node != NULL);
+ goto out_unlock;
+ }
+
+ for( ;; ) {
+
+ /* Traverse the tree until you find a leaf node. */
+ if (n->rb_left != NULL) {
+ n = n->rb_left;
+ continue;
+ }
+ if (n->rb_right != NULL) {
+ n = n->rb_right;
+ continue;
+ }
+
+ /* Dereference n while the pointer is still valid. */
+ tmp = rb_entry(n, struct isofs_inode_map_node, node_for_ino);
+ parent = n->rb_parent;
+
+ /* Free the leaf node from the rbtree. */
+ isofs_inode_map_node_destroy_unlocked(tmp);
+
+ /* Exit Here */
+ if (parent == NULL) {
+ break;
+ }
+
+ /* Update child of parent. */
+ if (parent->rb_left == n) {
+ parent->rb_left = NULL;
+ } else {
+ parent->rb_right = NULL;
+ }
+
+ /* Walk up the rbtree as a substitute for recursion. */
+ n = parent;
+ }
+
+ /* Both trees shared the nodes. */
+ map->index_by_ino = RB_ROOT;
+ map->index_by_phy = RB_ROOT;
+
+ out_unlock:
+
+ write_unlock(&map->lock);
+}
diff -uprN -X dontdiff linux-2.6.6/fs/isofs/Makefile linux-2.6.6-isofs/fs/isofs/Makefile
--- linux-2.6.6/fs/isofs/Makefile 2004-04-03 21:38:26.000000000 -0600
+++ linux-2.6.6-isofs/fs/isofs/Makefile 2004-05-19 08:24:27.715677432 -0500
@@ -4,7 +4,7 @@

obj-$(CONFIG_ISO9660_FS) += isofs.o

-isofs-objs-y := namei.o inode.o dir.o util.o rock.o
+isofs-objs-y := namei.o inode.o inode_map.o dir.o util.o rock.o
isofs-objs-$(CONFIG_JOLIET) += joliet.o
isofs-objs-$(CONFIG_ZISOFS) += compress.o
isofs-objs := $(isofs-objs-y)
diff -uprN -X dontdiff linux-2.6.6/fs/isofs/namei.c linux-2.6.6-isofs/fs/isofs/namei.c
--- linux-2.6.6/fs/isofs/namei.c 2004-04-03 21:37:38.000000000 -0600
+++ linux-2.6.6-isofs/fs/isofs/namei.c 2004-05-19 08:24:27.716677280 -0500
@@ -65,6 +65,8 @@ isofs_find_entry(struct inode *dir, stru
unsigned long bufsize = ISOFS_BUFFER_SIZE(dir);
unsigned char bufbits = ISOFS_BUFFER_BITS(dir);
unsigned int block, f_pos, offset;
+ sector_t abs_block;
+ unsigned long abs_block_offset;
struct buffer_head * bh = NULL;
struct isofs_sb_info *sbi = ISOFS_SB(dir->i_sb);

@@ -87,7 +89,8 @@ isofs_find_entry(struct inode *dir, stru
}

de = (struct iso_directory_record *) (bh->b_data + offset);
- inode_number = (bh->b_blocknr << bufbits) + offset;
+ abs_block = bh->b_blocknr;
+ abs_block_offset = offset;

de_len = *(unsigned char *) de;
if (!de_len) {
@@ -151,6 +154,10 @@ isofs_find_entry(struct inode *dir, stru
}
if (match) {
if (bh) brelse(bh);
+ inode_number =
+ isofs_inode_map_index_by_phy(&sbi->s_inode_map,
+ abs_block,
+ abs_block_offset);
return inode_number;
}
}
diff -uprN -X dontdiff linux-2.6.6/fs/isofs/rock.c linux-2.6.6-isofs/fs/isofs/rock.c
--- linux-2.6.6/fs/isofs/rock.c 2004-05-19 07:55:33.752279936 -0500
+++ linux-2.6.6-isofs/fs/isofs/rock.c 2004-05-20 06:50:05.763064376 -0500
@@ -164,6 +164,8 @@ int parse_rock_ridge_inode_internal(stru
int len;
unsigned char * chr;
int symlink_len = 0;
+ unsigned long inode_number = 0;
+ struct isofs_inode_map *map = &(ISOFS_SB(inode->i_sb)->s_inode_map);
CONTINUE_DECLS;

if (!ISOFS_SB(inode->i_sb)->s_rock) return 0;
@@ -306,9 +308,16 @@ int parse_rock_ridge_inode_internal(stru
goto out;
case SIG('C','L'):
ISOFS_I(inode)->i_first_extent = isonum_733(rr->u.CL.location);
- reloc = iget(inode->i_sb,
- (ISOFS_I(inode)->i_first_extent <<
- ISOFS_SB(inode->i_sb)->s_log_zone_size));
+ inode_number =
+ isofs_inode_map_index_by_phy(map,
+ ISOFS_I(inode)->i_first_extent,
+ 0);
+ if (inode_number == 0) {
+ printk(KERN_ERR "Inode number overflowed while "
+ "parsing rock ridge inode.\n");
+ goto out;
+ }
+ reloc = iget(inode->i_sb, inode_number);
if (!reloc)
goto out;
inode->i_mode = reloc->i_mode;
@@ -447,37 +456,43 @@ int parse_rock_ridge_inode(struct iso_di
static int rock_ridge_symlink_readpage(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
+ struct isofs_sb_info* sbi = ISOFS_SB(inode->i_sb);
char *link = kmap(page);
unsigned long bufsize = ISOFS_BUFFER_SIZE(inode);
- unsigned char bufbits = ISOFS_BUFFER_BITS(inode);
struct buffer_head *bh;
char *rpnt = link;
unsigned char *pnt;
struct iso_directory_record *raw_inode;
CONTINUE_DECLS;
- int block;
+ struct isofs_inode_map *map;
+ struct isofs_inode_map_node *map_node;
int sig;
int len;
unsigned char *chr;
struct rock_ridge *rr;

- if (!ISOFS_SB(inode->i_sb)->s_rock)
+ if (!sbi->s_rock)
panic ("Cannot have symlink with high sierra variant of iso filesystem\n");

- block = inode->i_ino >> bufbits;
+ map = &sbi->s_inode_map;
+ map_node = isofs_inode_map_find_by_ino(map, inode->i_ino);
+ if (map_node == NULL) {
+ goto out_eio;
+ }
+
lock_kernel();
- bh = sb_bread(inode->i_sb, block);
+ bh = sb_bread(inode->i_sb, map_node->block);
if (!bh)
goto out_noread;

- pnt = (unsigned char *) bh->b_data + (inode->i_ino & (bufsize - 1));
+ pnt = (unsigned char *) bh->b_data + map_node->block_offset;

raw_inode = (struct iso_directory_record *) pnt;

/*
* If we go past the end of the buffer, there is some sort of error.
*/
- if ((inode->i_ino & (bufsize - 1)) + *pnt > bufsize)
+ if (map_node->block_offset + *pnt > bufsize)
goto out_bad_span;

/* Now test for possible Rock Ridge extensions which will override
@@ -543,6 +558,7 @@ static int rock_ridge_symlink_readpage(s
SetPageError(page);
kunmap(page);
unlock_page(page);
+ out_eio:
return -EIO;
}

diff -uprN -X dontdiff linux-2.6.6/include/linux/iso_fs.h linux-2.6.6-isofs/include/linux/iso_fs.h
--- linux-2.6.6/include/linux/iso_fs.h 2004-04-03 21:37:24.000000000 -0600
+++ linux-2.6.6-isofs/include/linux/iso_fs.h 2004-05-19 17:00:04.749359160 -0500
@@ -2,6 +2,7 @@
#ifndef _ISOFS_FS_H
#define _ISOFS_FS_H

+#include <linux/rbtree.h>
#include <linux/types.h>
/*
* The isofs filesystem constants/structures
@@ -155,6 +156,18 @@ struct iso_directory_record {
char name [0];
} __attribute__((packed));

+
+/* Inode Map Node Cache -- Explicitly map 32-bit inode numbers to the
+ * arbitrary block and offset of an ISO 9660 Directory Record. */
+struct isofs_inode_map_node {
+ unsigned long inode_number;
+ sector_t block;
+ unsigned long block_offset;
+ struct rb_node node_for_phy;
+ struct rb_node node_for_ino;
+};
+
+
#define ISOFS_BLOCK_BITS 11
#define ISOFS_BLOCK_SIZE 2048

@@ -231,6 +244,31 @@ extern struct dentry *isofs_lookup(struc
extern struct buffer_head *isofs_bread(struct inode *, sector_t);
extern int isofs_get_blocks(struct inode *, sector_t, struct buffer_head **, unsigned long);

+extern int init_inode_map_node_cache(void);
+extern void destroy_inode_map_node_cache(void);
+
+extern void isofs_inode_map_init(struct isofs_inode_map *map);
+
+extern void isofs_inode_map_set_root_inode(struct isofs_inode_map *map,
+ sector_t block,
+ unsigned long block_offset);
+
+extern struct isofs_inode_map_node *
+isofs_inode_map_find_by_ino(struct isofs_inode_map *map,
+ unsigned long inode_number);
+
+extern struct isofs_inode_map_node *
+isofs_inode_map_find_by_phy(struct isofs_inode_map *map,
+ sector_t block,
+ unsigned long block_offset);
+
+extern unsigned long
+isofs_inode_map_index_by_phy(struct isofs_inode_map *map,
+ sector_t block,
+ unsigned long block_offset);
+
+extern void isofs_inode_map_destroy(struct isofs_inode_map *map);
+
extern struct inode_operations isofs_dir_inode_operations;
extern struct file_operations isofs_dir_operations;
extern struct address_space_operations isofs_symlink_aops;
diff -uprN -X dontdiff linux-2.6.6/include/linux/iso_fs_sb.h linux-2.6.6-isofs/include/linux/iso_fs_sb.h
--- linux-2.6.6/include/linux/iso_fs_sb.h 2004-04-03 21:38:28.000000000 -0600
+++ linux-2.6.6-isofs/include/linux/iso_fs_sb.h 2004-05-20 07:04:51.456418520 -0500
@@ -1,6 +1,15 @@
#ifndef _ISOFS_FS_SB
#define _ISOFS_FS_SB

+
+struct isofs_inode_map {
+ rwlock_t lock;
+ unsigned long next_inode_number;
+ struct rb_root index_by_phy;
+ struct rb_root index_by_ino;
+};
+
+
/*
* iso9660 super-block data in memory
*/
@@ -29,6 +38,8 @@ struct isofs_sb_info {
gid_t s_gid;
uid_t s_uid;
struct nls_table *s_nls_iocharset; /* Native language support table */
+
+ struct isofs_inode_map s_inode_map;
};

#endif
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/