Re: VFS 64-bit clean

Jakub Jelinek (jj@sunsite.ms.mff.cuni.cz)
Wed, 25 Feb 1998 13:16:13 +0100 (MET)


> At 64-bit systems the VFS layer is 64-bit clean.
> Will the user-space API be 64-bit clean is another matter.
> (Here I mean especially 32-bit user API on UltraSPARC machines.)
>
> At 64-bit architectures EXT2 can now handle up to 4 GB -1
> bytes in the file. Extending file-size encoding in the
> i-nodes is one matter, other things are user controllable,
> and don't need kernel changes.

The following patch adds support for large files on EXT2 on 64bit machines.
Once the kernel will be able to handle large files on 32bit machines, this can
be easily changed.
On sparc64, even 32bit programs can make use of large files: llseek works
as expected, using xstat one can get the 64bit st_size...
By large file I mean a file up to 16.06GB for blocksize 1K, 256.5GB for
blocksize 2K and 4.004TB for blocksize 4K (ext2 so far does not allow large
block sizes, although on machines with PAGE_SIZE 8K and higher it could be easily
extended).

This is what I have now on my ext2 filesystem (with 1K blocksize):
[root@manka stat-1.5]# ./stat /mnt/large_file
File: "/mnt/large_file"
Size: 8589935616 Allocated Blocks: 10 Filetype: Regular File
Mode: (0644/-rw-r--r--) Uid: ( 0/ root) Gid: ( 0/
root)
Device: 8,2 Inode: 12 Links: 1
Access: Wed Feb 25 12:25:39 1998(00000.00:05:58)
Modify: Wed Feb 25 12:25:39 1998(00000.00:05:58)
Change: Wed Feb 25 12:31:14 1998(00000.00:00:23)
Optimal Blocksize: 8192
[root@manka stat-1.5]# ./stat /mnt/large_file2
File: "/mnt/large_file2"
Size: 17179870208 Allocated Blocks: 10 Filetype: Regular File
Mode: (0644/-rw-r--r--) Uid: ( 0/ root) Gid: ( 0/
root)
Device: 8,2 Inode: 13 Links: 1
Access: Wed Feb 25 12:25:48 1998(00000.00:05:50)
Modify: Wed Feb 25 12:25:48 1998(00000.00:05:50)
Change: Wed Feb 25 12:31:14 1998(00000.00:00:24)
Optimal Blocksize: 8192
[root@manka stat-1.5]# ./stat /mnt/large_file3
File: "/mnt/large_file3"
Size: 17247252479 Allocated Blocks: 10 Filetype: Regular File
Mode: (0644/-rw-r--r--) Uid: ( 0/ root) Gid: ( 0/
root)
Device: 8,2 Inode: 14 Links: 1
Access: Wed Feb 25 12:25:58 1998(00000.00:05:41)
Modify: Wed Feb 25 12:25:58 1998(00000.00:05:41)
Change: Wed Feb 25 12:31:14 1998(00000.00:00:25)
Optimal Blocksize: 8192

(all of these files are sparse, but just because I don't have enough space
on my workstation). Note 17247252479 is the largest file possible with 1K
blocksize. Trying to seek/write behind that will give you an error (as it
would before when trying to seek/write past 4G).

Can this simple patch be included in next 2.1.89?
I have tried to minimize long long operations on 32bit machines where it is
inefficient and does not make sense

--- ./fs/ext2/file.c.jj Mon Feb 9 17:44:21 1998
+++ ./fs/ext2/file.c Wed Feb 25 12:15:19 1998
@@ -37,6 +37,17 @@ static long long ext2_file_lseek(struct
static ssize_t ext2_file_write (struct file *, const char *, size_t, loff_t *);
static int ext2_release_file (struct inode *, struct file *);

+#define EXT2_MAX_SIZE(bits) \
+ (((EXT2_NDIR_BLOCKS + (1LL << (bits - 2)) + \
+ (1LL << (bits - 2)) * (1LL << (bits - 2)) + \
+ (1LL << (bits - 2)) * (1LL << (bits - 2)) * (1LL << (bits - 2))) * \
+ (1LL << bits)) - 1)
+
+static long long ext2_max_sizes[] = {
+0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+EXT2_MAX_SIZE(10), EXT2_MAX_SIZE(11), EXT2_MAX_SIZE(12), EXT2_MAX_SIZE(13)
+};
+
/*
* We have mostly NULL's here: the current defaults are ok for
* the ext2 filesystem.
@@ -86,7 +97,6 @@ static long long ext2_file_lseek(
long long offset,
int origin)
{
- long long retval;
struct inode *inode = file->f_dentry->d_inode;

switch (origin) {
@@ -96,17 +106,20 @@ static long long ext2_file_lseek(
case 1:
offset += file->f_pos;
}
- retval = -EINVAL;
- /* make sure the offset fits in 32 bits */
- if (((unsigned long long) offset >> 32) == 0) {
- if (offset != file->f_pos) {
- file->f_pos = offset;
- file->f_reada = 0;
- file->f_version = ++event;
- }
- retval = offset;
+ if (((unsigned long long) offset >> 32) != 0) {
+ if (sizeof(inode->i_size) > 4) {
+ if (offset > ext2_max_sizes
+ [EXT2_BLOCK_SIZE_BITS(inode->i_sb)])
+ return -EINVAL;
+ } else
+ return -EINVAL;
+ }
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ file->f_reada = 0;
+ file->f_version = ++event;
}
- return retval;
+ return offset;
}

static inline void remove_suid(struct inode *inode)
@@ -128,7 +141,7 @@ static ssize_t ext2_file_write (struct f
size_t count, loff_t *ppos)
{
struct inode * inode = filp->f_dentry->d_inode;
- __u32 pos;
+ off_t pos;
long block;
int offset;
int written, c;
@@ -165,13 +178,26 @@ static ssize_t ext2_file_write (struct f
pos = *ppos;
if (pos != *ppos)
return -EINVAL;
+ if (sizeof(inode->i_size) > 4 && pos >
+ ext2_max_sizes[EXT2_BLOCK_SIZE_BITS(sb)])
+ return -EINVAL;
}

/* Check for overflow.. */
- if (pos > (__u32) (pos + count)) {
- count = ~pos; /* == 0xFFFFFFFF - pos */
- if (!count)
- return -EFBIG;
+ if (sizeof(inode->i_size) > 4) {
+ off_t max = ext2_max_sizes[EXT2_BLOCK_SIZE_BITS(sb)];
+
+ if (pos + count > max) {
+ count = max - pos;
+ if (!count)
+ return -EFBIG;
+ }
+ } else {
+ if (pos > (__u32) (pos + count)) {
+ count = ~pos; /* == 0xFFFFFFFF - pos */
+ if (!count)
+ return -EFBIG;
+ }
}

/*
--- ./fs/ext2/inode.c.jj Thu Feb 12 18:10:42 1998
+++ ./fs/ext2/inode.c Wed Feb 25 09:30:31 1998
@@ -493,7 +493,18 @@ void ext2_read_inode (struct inode * ino
inode->u.ext2_i.i_frag_size = raw_inode->i_fsize;
inode->u.ext2_i.i_osync = 0;
inode->u.ext2_i.i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
- inode->u.ext2_i.i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
+ if (S_ISDIR(inode->i_mode))
+ inode->u.ext2_i.i_dir_acl = le32_to_cpu(raw_inode->un1.i_dir_acl);
+ else {
+ inode->u.ext2_i.i_dir_acl = 0;
+ if (sizeof(inode->i_size) > 4)
+ inode->i_size |= ((off_t)le32_to_cpu(raw_inode->un1.i_size_high)) << 32;
+ else {
+ if (raw_inode->un1.i_size_high)
+ ext2_error(inode->i_sb, "ext2_read_inode",
+ "Inode has non-zero i_size_high on 32bit machine!");
+ }
+ }
inode->u.ext2_i.i_version = le32_to_cpu(raw_inode->i_version);
inode->u.ext2_i.i_block_group = block_group;
inode->u.ext2_i.i_next_alloc_block = 0;
@@ -603,7 +614,12 @@ static int ext2_update_inode(struct inod
raw_inode->i_frag = inode->u.ext2_i.i_frag_no;
raw_inode->i_fsize = inode->u.ext2_i.i_frag_size;
raw_inode->i_file_acl = cpu_to_le32(inode->u.ext2_i.i_file_acl);
- raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext2_i.i_dir_acl);
+ if (S_ISDIR(inode->i_mode))
+ raw_inode->un1.i_dir_acl = cpu_to_le32(inode->u.ext2_i.i_dir_acl);
+ else if (sizeof(inode->i_size) > 4)
+ raw_inode->un1.i_size_high = cpu_to_le32(inode->i_size >> 32);
+ else
+ raw_inode->un1.i_size_high = 0;
raw_inode->i_version = cpu_to_le32(inode->u.ext2_i.i_version);
if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode))
raw_inode->i_block[0] = cpu_to_le32(kdev_t_to_1022nr(inode->i_rdev));
--- ./include/linux/ext2_fs.h.jj Mon Feb 9 17:50:54 1998
+++ ./include/linux/ext2_fs.h Wed Feb 25 09:17:03 1998
@@ -229,7 +229,10 @@ struct ext2_inode {
__u32 i_block[EXT2_N_BLOCKS];/* Pointers to blocks */
__u32 i_version; /* File version (for NFS) */
__u32 i_file_acl; /* File ACL */
- __u32 i_dir_acl; /* Directory ACL */
+ union {
+ __u32 i_dir_acl; /* Directory ACL */
+ __u32 i_size_high; /* High 32bits of i_size for file */
+ } un1;
__u32 i_faddr; /* Fragment address */
union {
struct {

Cheers,
Jakub
___________________________________________________________________
Jakub Jelinek | jj@sunsite.mff.cuni.cz | http://sunsite.mff.cuni.cz
Administrator of SunSITE Czech Republic, MFF, Charles University
___________________________________________________________________
Ultralinux - first 64bit OS to take full power of the UltraSparc
Linux version 2.1.88 on a sparc64 machine (498.80 BogoMips).
___________________________________________________________________

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu