Re: [RFC v2][PATCH 5/9] Memory managemnet - restore state

From: Louis Rilling
Date: Thu Aug 21 2008 - 06:07:52 EST


On Wed, Aug 20, 2008 at 11:05:39PM -0400, Oren Laadan wrote:
>
> Restoring the memory address space begins with nuking the existing one
> of the current process, and then reading the VMA state and contents.
> Call do_mmap_pgoffset() for each VMA and then read in the data.

[...]

> diff --git a/checkpoint/rstr_mem.c b/checkpoint/rstr_mem.c
> new file mode 100644
> index 0000000..df602a9
> --- /dev/null
> +++ b/checkpoint/rstr_mem.c

[...]

> +static int cr_read_vma(struct cr_ctx *ctx, struct mm_struct *mm)
> +{
> + struct cr_hdr_vma *hh = cr_hbuf_get(ctx, sizeof(*hh));
> + unsigned long vm_size, vm_flags, vm_prot, vm_pgoff;
> + unsigned long addr;
> + unsigned long flags;
> + struct file *file = NULL;
> + char *fname = NULL;
> + int ret;
> +
> + ret = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_VMA);
> + if (ret < 0)
> + return ret;
> + else if (ret != 0)
> + return -EINVAL;
> +
> + cr_debug("vma %#lx-%#lx npages %d\n", (unsigned long) hh->vm_start,
> + (unsigned long) hh->vm_end, (int) hh->npages);
> +
> + if (hh->vm_end < hh->vm_start || hh->npages < 0)
> + return -EINVAL;
> +
> + vm_size = hh->vm_end - hh->vm_start;
> + vm_prot = cr_calc_map_prot_bits(hh->vm_flags);
> + vm_flags = cr_calc_map_flags_bits(hh->vm_flags);
> + vm_pgoff = hh->vm_pgoff;
> +
> + if (hh->fname) {
> + fname = ctx->tbuf;
> + ret = cr_read_str(ctx, fname, PAGE_SIZE);
> + if (ret < 0)
> + return ret;
> + }
> +
> + cr_debug("vma fname '%s' how %d\n", fname, hh->how);
> +
> + switch (hh->how) {
> +
> + case CR_VMA_ANON: /* anonymous private mapping */
> + if (hh->fname)
> + return -EINVAL;
> + /* vm_pgoff for anonymous mapping is the "global" page
> + offset (namely from addr 0x0), so we force a zero */
> + vm_pgoff = 0;
> + break;
> +
> + case CR_VMA_FILE: /* private mapping from a file */
> + if (!hh->fname)
> + return -EINVAL;
> + /* O_RDWR only needed if both (VM_WRITE|VM_SHARED) are set */
> + flags = hh->vm_flags & (VM_WRITE | VM_SHARED);
> + flags = (flags == (VM_WRITE | VM_SHARED) ? O_RDWR : O_RDONLY);
> + file = filp_open(fname, flags, 0);
> + if (IS_ERR(file))
> + return PTR_ERR(file);
> + break;
> +
> + default:
> + return -EINVAL;
> +
> + }
> +
> + addr = do_mmap_pgoff(file, (unsigned long) hh->vm_start,
> + vm_size, vm_prot, vm_flags, vm_pgoff);
> + cr_debug("size %#lx prot %#lx flag %#lx pgoff %#lx => %#lx\n",
> + vm_size, vm_prot, vm_flags, vm_pgoff, addr);
> +
> + /* the file (if opened) is now referenced by the vma */
> + if (file)
> + filp_close(file, NULL);
> +
> + if (IS_ERR((void*) addr))
> + return (PTR_ERR((void *) addr));
> +
> + /*
> + * CR_VMA_ANON: read in memory as is
> + * CR_VMA_FILE: read in memory as is
> + * (more to follow ...)
> + */
> +
> + switch (hh->how) {
> + case CR_VMA_ANON:
> + case CR_VMA_FILE:
> + /* standard case: read the data into the memory */
> + ret = cr_vma_read_pages(ctx, hh);
> + break;
> + }
> +
> + if (ret < 0)
> + return ret;
> +
> + if (vm_prot & PROT_EXEC)
> + flush_icache_range(hh->vm_start, hh->vm_end);
> +
> + cr_hbuf_put(ctx, sizeof(*hh));
> + cr_debug("vma retval %d\n", ret);
> + return 0;
> +}
> +
> +static int cr_destroy_mm(struct mm_struct *mm)
> +{
> + struct vm_area_struct *vmnext = mm->mmap;
> + struct vm_area_struct *vma;
> + int ret;
> +
> + while (vmnext) {
> + vma = vmnext;
> + vmnext = vmnext->vm_next;
> + ret = do_munmap(mm, vma->vm_start, vma->vm_end-vma->vm_start);
> + if (ret < 0)
> + return ret;
> + }
> + return 0;
> +}
> +
> +int cr_read_mm(struct cr_ctx *ctx)
> +{
> + struct cr_hdr_mm *hh = cr_hbuf_get(ctx, sizeof(*hh));
> + struct mm_struct *mm;
> + int nr, ret;
> +
> + ret = cr_read_obj_type(ctx, hh, sizeof(*hh), CR_HDR_MM);
> + if (ret < 0)
> + return ret;
> +#if 0 /* activate when containers are used */
> + if (ret != task_pid_vnr(current))
> + return -EINVAL;
> +#endif
> + cr_debug("map_count %d\n", hh->map_count);
> +
> + /* XXX need more sanity checks */
> + if (hh->start_code > hh->end_code ||
> + hh->start_data > hh->end_data || hh->map_count < 0)
> + return -EINVAL;
> +
> + mm = current->mm;
> +
> + /* point of no return -- destruct current mm */
> + down_write(&mm->mmap_sem);
> + ret = cr_destroy_mm(mm);
> + up_write(&mm->mmap_sem);
> +
> + if (ret < 0)
> + return ret;
> +

Should down_write(&mm->mmap_sem) again here, and hold it until all vmas are
restored. This means removing down_write() from cr_vma_writable(). Or perhaps
make it finer grain: release it before looping on the vmas and make
cr_read_vma() take it again before calling do_mmap_pgoff().

> + mm->start_code = hh->start_code;
> + mm->end_code = hh->end_code;
> + mm->start_data = hh->start_data;
> + mm->end_data = hh->end_data;
> + mm->start_brk = hh->start_brk;
> + mm->brk = hh->brk;
> + mm->start_stack = hh->start_stack;
> + mm->arg_start = hh->arg_start;
> + mm->arg_end = hh->arg_end;
> + mm->env_start = hh->env_start;
> + mm->env_end = hh->env_end;
> +
> + /* FIX: need also mm->flags */
> +
> + for (nr = hh->map_count; nr; nr--) {
> + ret = cr_read_vma(ctx, mm);
> + if (ret < 0)
> + return ret;
> + }
> +
> + ret = cr_read_mm_context(ctx, mm, hh->tag);
> +
> + cr_hbuf_put(ctx, sizeof(*hh));
> + return ret;
> +}

Thanks,

Louis

--
Dr Louis Rilling Kerlabs
Skype: louis.rilling Batiment Germanium
Phone: (+33|0) 6 80 89 08 23 80 avenue des Buttes de Coesmes
http://www.kerlabs.com/ 35700 Rennes

Attachment: signature.asc
Description: Digital signature