RFC: kdrvscan

From: Nathaniel McCallum
Date: Thu Feb 10 2011 - 14:17:37 EST


Please CC me on any responses as I'm not subscribed to lkml.

A while back I posted (https://lkml.org/lkml/2009/10/1/334) a patch to
tag *_device_id tables into sections so that one could easily extract
an inventory a linux kernel binary to determine what hardware it
supports. Since that time, I have pursued another solution, namely
kdrvscan. kdrvscan is a small command line program which fingerprints
the symbols listed in System.map. This program has the advantage of
not requiring any change to the kernel binary. However, it also
commits the cardinal sin of importing kernel headers into user space.
The source code is attached along with a simple Makefile that works on
my system. Before I go much further in cleaning it up I'm wondering,
does this approach make sense? Could a utility like this be included
upstream?

Nathaniel
/* Ok, yes, these includes are hideously ugly.
* And yes, I know its evil to include kernel headers into userspace, but the
* whole point of this program though is to fingerprint kernel internal structs.
* I'm VERY open to better ideas.
*
* However, there should be very little conflict between kernel and userspace
* type sizes in this case. When in doubt, we use the kernel sizes. Since
* we don't use any of these data types anyway, it shouldn't really matter.
*
* For background info, see:
* http://kerneltrap.org/mailarchive/linux-kernel/2009/10/1/4492876
* or
* https://lkml.org/lkml/2009/10/1/334
*/
#define __KERNEL__
#define KBUILD_MODNAME "kdrvscan"
#include <generated/autoconf.h>
#include <linux/pci.h>
#include <linux/usb.h>
#include <acpi/acpi_bus.h>

// For printf(), fopen(), fclose(), fseek(), etc
#undef __always_inline
#define __off_t_defined // Use kernel off_t
#define __ssize_t_defined // Use kernel ssize_t
#include <stdio.h>

// For decompressing the kernel
#define __gid_t_defined // Use kernel git_t
#define __uid_t_defined // Use kernel uid_t
#define __pid_t_defined // Use kernel pid_t
#define _SYS_TYPES_H
#include <zlib.h>

// Since I have trouble importing stdlib.h...
extern void *realloc (void *__ptr, size_t __size);
extern void free (void *__ptr);
extern char *strdup (const char *__s);

// Copied from kernel lib/ctype.c
const unsigned char _ctype[] = {
_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */
_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */
_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */
_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */
_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */
_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */
_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */
_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */
_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */
_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */
_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */
_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */
_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */
_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */
_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */
_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */
_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */
_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */
_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */
_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */
_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */
_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */

// TODO: add a build flag to find 32/64bit
#if BIT64
#define ELFCLASS ELFCLASS64
#define Elf_Ehdr Elf64_Ehdr
#define Elf_Shdr Elf64_Shdr
#define Elf_Addr Elf64_Addr
#else
#define ELFCLASS ELFCLASS32
#define Elf_Ehdr Elf32_Ehdr
#define Elf_Shdr Elf32_Shdr
#define Elf_Addr Elf32_Addr
#endif

#define isnull(s) memisnull(&(s), sizeof(s))
static bool memisnull(void *mem, size_t size) {
for (size_t i=0 ; i < size ; i++)
if (((char *) mem)[i] != 0)
return false;
return true;
}

static bool find_magic(FILE *file, char prev) {
char c;

if (fread(&c, 1, 1, file) != 1) return false;
if (prev == '\037' && c == '\213') {
if (fseek(file, -2, SEEK_CUR) == -1) return false;
return true;
}
return find_magic(file, c);
}

#define BLOCKSIZE 1024
static void *read_block(gzFile file, void *buf, size_t size) {
void *tmp = realloc(buf, size + BLOCKSIZE);
if (!tmp) free(buf);
if (!(buf = tmp)) return NULL;

int bytes = gzread(file, buf + size, BLOCKSIZE);
if (bytes < 0) {
free(buf);
return NULL;
}
else if (bytes == 0)
return buf;
else
return read_block(file, buf, size + bytes);
}

static void *kernel_load(const char *filename) {
void *kernel = NULL;

// Open the file
FILE *file = fopen(filename, "r");
if (!file) return NULL;

// Find the magic
if (!find_magic(file, '\0')) { fclose(file); return NULL; }

// Open it as a gzip file
int fd = dup(fileno(file));
fclose(file); file = NULL;
if (fd < 0) return NULL;
gzFile kfile = gzdopen(fd, "r");
if (!kfile) { close(fd) ; return NULL; }
fd = -1;

// Read the kernel into memory
kernel = read_block(kfile, NULL, 0);
gzclose(kfile);
if (!kernel) return NULL;

// Ensure this is ELF
if (strncmp(ELFMAG, kernel, strlen(ELFMAG)) ||
((char *) kernel)[EI_CLASS] != ELFCLASS ||
((char *) kernel)[EI_VERSION] != EV_CURRENT) {
free(kernel);
return NULL;
}

return kernel;
}

static const Elf_Shdr *elf_sect_from_addr(const void *kernel, Elf_Addr addr) {
if (!kernel || addr == 0) return NULL;
const Elf_Ehdr *ehdr = ((const Elf_Ehdr *) kernel);
const Elf_Shdr *shdrs = kernel + ehdr->e_shoff;

for (int i=0 ; i < ehdr->e_shnum ; i++)
if (shdrs[i].sh_addr < addr && addr < (shdrs[i].sh_addr + shdrs[i].sh_size))
return &(shdrs[i]);
return NULL;
}

static const char *elf_sect_name(const void *kernel, const Elf_Shdr *shdr) {
if (!kernel || !shdr) return NULL;
const Elf_Ehdr *ehdr = ((const Elf_Ehdr *) kernel);
const Elf_Shdr *shdrs = kernel + ehdr->e_shoff;

return kernel + shdrs[ehdr->e_shstrndx].sh_offset + shdr->sh_name;
}

#define elf_obj_from_addr(kernel, addr, type) ((type *) elf_obj_from_addr_(kernel, addr))
static const void *elf_obj_from_addr_(const void *kernel, Elf_Addr addr) {
if (!kernel || addr == 0) return NULL;
const Elf_Ehdr *ehdr = ((const Elf_Ehdr *) kernel);
const Elf_Shdr *shdrs = kernel + ehdr->e_shoff;

for (int i=0 ; i < ehdr->e_shnum ; i++)
if (shdrs[i].sh_addr < addr && addr < (shdrs[i].sh_addr + shdrs[i].sh_size))
return kernel + shdrs[i].sh_offset + (addr - shdrs[i].sh_addr);
return NULL;
}

static const char *str_from_ptr(const char *str, const char *maxptr) {
if (!str) return NULL;

bool hasalnum = false;
for (const char *tmp = str ; *tmp ; tmp++) {
if (tmp >= maxptr || !isprint(*tmp))
return NULL;
hasalnum = isalnum(*tmp);
}
return hasalnum ? str : NULL;
}

static const char *elf_str_from_addr(const void *kernel, Elf_Addr addr) {
if (!kernel || addr == 0) return NULL;
const Elf_Shdr *shdr = elf_sect_from_addr(kernel, addr);
if (!shdr) return NULL;

return str_from_ptr(elf_obj_from_addr(kernel, addr, const char),
kernel + shdr->sh_offset + shdr->sh_size);
}

#define elf_array_from_addr(kernel, addr, type) (type *) elf_array_from_addr_(kernel, addr, sizeof(type))
static void *elf_array_from_addr_(const void *kernel, Elf_Addr addr, size_t objsize) {
if (!kernel || addr == 0 || objsize == 0) return NULL;
const Elf_Shdr *shdr = elf_sect_from_addr(kernel, addr);

// Get the start of the array
void *item = elf_obj_from_addr(kernel, addr, struct arrayitem);
if (!item) return NULL;

// Make sure the end of the array is inside the section
for (size_t i=0 ; item + (i+1) * objsize <= kernel + shdr->sh_offset + shdr->sh_size ; i++) {
if (memisnull(item + i * objsize, objsize))
return item;
}
return NULL;
}

static bool print_pci_drivers(void *kernel, struct pci_driver *driver) {
if (!kernel || !driver) return false;

// Fingerprint it
if (driver->name == NULL || driver->id_table == NULL || !isnull(driver->node) || !isnull(driver->driver))
return false;

// Get the name of the driver
const char *name = elf_str_from_addr(kernel, (Elf_Addr) driver->name);
if (!name) return NULL;

// Get the pci_device_id table
const char *section = elf_sect_name(kernel, elf_sect_from_addr(kernel, (Elf_Addr) driver->id_table));
if (!section || (strcmp(section, ".data") && strcmp(section, ".rodata"))) return false;
struct pci_device_id *ids = elf_array_from_addr(kernel, (Elf_Addr) driver->id_table, struct pci_device_id);
if (!ids) return false;

// Ok, it looks like we have a PCI driver!
for (size_t j=0 ; !isnull(ids[j]) ; j++) {
int bcm = ids[j].class_mask >> 16 & 0xff;
int bc = ids[j].class >> 16 & 0xff & bcm;
int scm = ids[j].class_mask >> 8 & 0xff;
int sc = ids[j].class >> 8 & 0xff & scm;
int im = ids[j].class_mask >> 0 & 0xff;
int i = ids[j].class >> 0 & 0xff & im;

printf("alias pci:");
printf(ids[j].vendor == 0xffffffff ? "v*" : "v%04x", ids[j].vendor);
printf(ids[j].device == 0xffffffff ? "d*" : "d%04x", ids[j].device);
printf(ids[j].subvendor == 0xffffffff ? "sv*" : "sv%04x", ids[j].subvendor);
printf(ids[j].subdevice == 0xffffffff ? "sd*" : "sd%04x", ids[j].subdevice);
printf(bcm == 0x00 ? "bc*" : "bc%02x", bc);
printf(scm == 0x00 ? "sc*" : "sc%02x", sc);
printf(im == 0x00 ? "i*" : "i%02x", i);
printf(" %s\n", name);
}
return true;
}

static bool print_usb_drivers(void *kernel, struct usb_driver *driver) {
if (!kernel || !driver) return false;

// Fingerprint it
if (driver->name == NULL || driver->id_table == NULL || !isnull(driver->drvwrap))
return false;

// Get the name of the driver
const char *name = elf_str_from_addr(kernel, (Elf_Addr) driver->name);
if (!name) return NULL;

// Get the usb_device_id table
const char *section = elf_sect_name(kernel, elf_sect_from_addr(kernel, (Elf_Addr) driver->id_table));
if (!section || (strcmp(section, ".data") && strcmp(section, ".rodata"))) return false;
struct usb_device_id *ids = elf_array_from_addr(kernel, (Elf_Addr) driver->id_table, struct usb_device_id);
if (!ids) return false;

// Ok, it looks like we have a USB driver!
for (size_t j=0 ; !isnull(ids[j]) ; j++) {
// TODO: I know this isn't exactly the same modalias as the traditional usb modalias,
// but my goal is to provide an exact replication of the struct and this version fits better
printf("alias usb:");
printf(ids[j].match_flags & USB_DEVICE_ID_MATCH_VENDOR ? "v%04x" : "v*", ids[j].idVendor);
printf(ids[j].match_flags & USB_DEVICE_ID_MATCH_PRODUCT ? "p%04x" : "p*", ids[j].idProduct);
printf(ids[j].match_flags & USB_DEVICE_ID_MATCH_DEV_LO ? "dl%04x" : "dl*", ids[j].bcdDevice_lo);
printf(ids[j].match_flags & USB_DEVICE_ID_MATCH_DEV_HI ? "dh%04x" : "dh*", ids[j].bcdDevice_hi);
printf(ids[j].match_flags & USB_DEVICE_ID_MATCH_DEV_CLASS ? "dc%04x" : "dc*", ids[j].bDeviceClass);
printf(ids[j].match_flags & USB_DEVICE_ID_MATCH_DEV_SUBCLASS ? "dsc%04x" : "dsc*", ids[j].bDeviceSubClass);
printf(ids[j].match_flags & USB_DEVICE_ID_MATCH_DEV_PROTOCOL ? "dp%04x" : "dp*", ids[j].bDeviceProtocol);
printf(ids[j].match_flags & USB_DEVICE_ID_MATCH_INT_CLASS ? "ic%04x" : "ic*", ids[j].bInterfaceClass);
printf(ids[j].match_flags & USB_DEVICE_ID_MATCH_INT_SUBCLASS ? "isc%04x" : "isc*", ids[j].bInterfaceSubClass);
printf(ids[j].match_flags & USB_DEVICE_ID_MATCH_INT_PROTOCOL ? "ip%04x" : "ip*", ids[j].bInterfaceProtocol);
printf(" %s\n", name);
}
return true;
}

static bool print_acpi_drivers(void *kernel, struct acpi_driver *driver) {
// Fingerprint
if (driver->ids == NULL || !isnull(driver->drv))
return false;

// Get the name of the driver
const char *name = str_from_ptr((const char *) &(driver->name), (const char *) &(driver->class));
if (!name) return false;

// Get the class of the driver (for only fingerprinting reasons)
const char *class = str_from_ptr((const char *) &(driver->class), (const char *) &(driver->ids));
if (!class) return false;

// Get the acpi_device_id table
const char *section = elf_sect_name(kernel, elf_sect_from_addr(kernel, (Elf_Addr) driver->ids));
if (!section || (strcmp(section, ".data") && strcmp(section, ".rodata"))) return false;
struct acpi_device_id *ids = elf_array_from_addr(kernel, (Elf_Addr) driver->ids, struct acpi_device_id);
if (!ids) return false;

// Ok, it looks like we have an ACPI driver!
for (size_t j=0 ; !isnull(ids[j]) ; j++) {
const char *id = str_from_ptr((const char *) &(ids[j]), (const char *) &(ids[j].driver_data));
printf("alias acpi:%s %s\n", id, name);
}
return true;
}

static void usage(char *progname) {
fprintf(stderr, "%s <kernel_file> [<system_map_file>]\n", progname);
_exit(1);
}

int main(int argc, char **argv) {
if (argc < 2 || argc > 3)
usage(argv[0]);

void *krnl = kernel_load(argv[1]);
if (!krnl) {
fprintf(stderr, "Unable to read vmlinuz file!\n");
usage(argv[0]);
}

char *mapfile = NULL;
if (argc == 3)
mapfile = strdup(argv[2]);
else {
char *tmp = strstr(argv[1], "vmlinuz");
if (tmp) {
mapfile = realloc(NULL, strlen(argv[1]) + strlen("System.map") - strlen("vmlinuz") + 1);
if (mapfile) {
strncpy(mapfile, argv[1], tmp - argv[1]);
strcat(mapfile, "System.map");
strcat(mapfile, tmp + strlen("vmlinuz"));
}
}
}
if (!mapfile) {
fprintf(stderr, "Unable to find System.map file!\n");
usage(argv[0]);
}

FILE *sysmap = fopen(mapfile, "r");
if (!sysmap) {
fprintf(stderr, "Unable to read System.map file ('%s')!\n", mapfile);
free(mapfile);
usage(argv[0]);
}
free(mapfile);

for (Elf_Addr addr=0 ; fscanf(sysmap, "%lx %*s %*s\n", (unsigned long int *) &addr) == 1 ; ) {
if (addr == 0) continue;

// The driver struct must be in the .data section
const char *section = elf_sect_name(krnl, elf_sect_from_addr(krnl, addr));
if (!section || strcmp(section, ".data")) continue;

// Fetch the driver struct
void *driver = elf_obj_from_addr(krnl, addr, void);
if (!driver) continue;

// Try to print the driver
if (!print_pci_drivers(krnl, driver))
if (!print_usb_drivers(krnl, driver))
print_acpi_drivers(krnl, driver);
}

fclose(sysmap);
free(krnl);

return 0;
}

Attachment: Makefile
Description: Binary data