Strange oops in iret_exc with my own module

From: Grzegorz Kulewski
Date: Thu Feb 08 2007 - 21:45:49 EST


Hi,

I written really simple and stupid module (the design wasn't mine). One tester of this module provided me the following oops it caused quite often (on many automated tests):

[17179765.492000] BUG: unable to handle kernel NULL pointer dereference at virtual address 00000000
[17179765.492000] printing eip:
[17179765.492000] c02c4845
[17179765.492000] *pde = 00000000
[17179765.492000] Oops: 0002 [#1]
[17179765.492000] Modules linked in: altpinfo rfcomm l2cap bluetooth ppdev cpufreq_userspace cpufreq_stats freq_table
cpufreq_powersave cpufreq_ondemand cpufreq_conservative video button battery container ac af_packet dm_mod md_mod ipv6
lp snd_seq_dummy snd_seq_oss snd_seq_midi snd_seq_midi_event snd_seq snd_via82xx gameport snd_ac97_codec snd_ac97_bus
via686a snd_pcm_oss snd_mixer_oss tsdev i2c_isa i2c_viapro usbhid snd_pcm snd_timer snd_page_alloc snd_mpu401_uart
snd_rawmidi snd_seq_device i2c_core snd soundcore psmouse 8139too mii pcspkr floppy serio_raw parport_pc rtc via_agp
agpgart parport shpchp pci_hotplug evdev ext3 jbd mbcache ide_generic uhci_hcd usbcore ide_cd cdrom ide_disk via82cxxx
generic thermal processor fan vga16fb cfbcopyarea vgastate cfbimgblt cfbfillrect
[17179765.492000] CPU: 0
[17179765.492000] EIP: 0060:[<c02c4845>] Not tainted VLI
[17179765.492000] EFLAGS: 00210246 (2.6.17.13nopreempt #3)
[17179765.492000] EIP is at iret_exc+0x4fd/0x75d
[17179765.492000] eax: 00000000 ebx: 00000000 ecx: 2b646970 edx: 336e3094
[17179765.492000] esi: 0809c724 edi: 00000000 ebp: 00000003 esp: c32b3f34
[17179765.492000] ds: 007b es: 007b ss: 0068
[17179765.492000] Process so6-3-test (pid: 7227, threadinfo=c32b2000 task=c620e550)
[17179765.492000] Stack: 00000000 2b646970 00000000 2b646970 d0833913 00000000 0809c724 2b646970
[17179765.492000] 0809c720 c47c8d40 40040c01 0809c720 c016a5f2 cadb0624 c47c8d40 40040c01
[17179765.492000] 0809c720 c47c8d40 00000000 c016a654 c015845a cffefa80 c47c8d40 c016a90d
[17179765.492000] Call Trace:
[17179765.492000] <d0833913> altpinfo_ioctl+0x103/0x170 [altpinfo] <c016a5f2> do_ioctl+0x52/0x60
[17179765.492000] <c016a654> vfs_ioctl+0x54/0x2b0 <c015845a> do_sys_open+0x9a/0xd0
[17179765.492000] <c016a90d> sys_ioctl+0x5d/0x90 <c0102d27> syscall_call+0x7/0xb
[17179765.492000] Code: 5c 24 28 c7 03 f2 ff ff ff e9 12 be f0 ff ba f2 ff ff ff e9 6d c0 f0 ff 8d 0c 8a e9 8f c0 f0 ff
01 c1 eb 03 8d 0c 88 51 50 31 c0 <f3> aa 58 59 e9 ca c0 f0 ff 8d 0c 88 51 50 31 c0 f3 aa 58 59 e9
[17179765.492000] EIP: [<c02c4845>] iret_exc+0x4fd/0x75d SS:ESP 0068:c32b3f34
[17179765.492000] <6>altpinfo: unloaded


The function altpinfo_ioctl looks like this:

static int altpinfo_ioctl(struct inode *inode, struct file *f, unsigned int cmd, unsigned long arg)
{
int ret = 0;

switch (cmd) {

case API_GET_FMT:
ret = put_fmt((char __user *)arg);
break;

case API_SET_FMT:
if (!capable(CAP_SYS_ADMIN)) {
return -EPERM;
}

ret = get_fmt((char __user *)arg);
break;

case API_GET_CURLEN:
ret = put_curlen(f, (int __user *)arg);
break;

default:
ret = -ENOTTY;
break;

}

return ret;
}


Helper functions look like this:

static int put_curlen(struct file *f, int __user *arg)
{
int left;
struct private_data *p = (struct private_data *) f->private_data;

if (down_interruptible(&p->sem) != 0) {
return -ERESTARTSYS;
}

left = p->buffer_size - f->f_pos;

up(&p->sem);

return put_user(left, arg);
}

static int put_fmt(char __user *arg)
{
char *fmt;
int ret;

ret = get_format_string(&fmt);

if (ret != 0) {
return ret;
}

if (copy_to_user(arg, fmt, strlen(fmt)) != 0) {
return -EIO;
}

kfree(fmt);

return 0;
}

/* Better make arg buffer at least 4 bytes alligned in userspace! */
static int get_fmt(char __user *arg)
{
int ret;
int len;
char *buf;

ret = get_user(len, (int __user *)arg);

if (ret != 0) {
return ret;
}

buf = kcalloc(len, sizeof(char), GFP_KERNEL);

if (copy_from_user(buf, arg + 4, len) != 0) {
kfree(buf);
return -EIO;
}

buf[len] = '\0';

ret = parse_format(buf);

if (ret != 0) {
kfree(buf);
return ret;
}

return 0;
}


I wonder what does this oops mean and what that function iret_exc really does and why? It looks like some automagical exception handler but I am not sure about it.

I can't reproduce this oops for now on my machine. Could anybody tell me why that code is wrong and why is this oops in iret_exc? (At the same time I am waiting for further info from the tester but I am not sure when and how much I will get.)

Any help on tracing this down would be appreciated.


Thanks,

Grzegorz Kulewski

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/