Re: [Intel-gfx] [RFC 06/17] drm: i915: fix sg_table nents vs. orig_nents misuse

From: Marek Szyprowski
Date: Thu Apr 30 2020 - 10:18:05 EST


Hi

On 28.04.2020 16:27, Tvrtko Ursulin wrote:
>
> On 28/04/2020 14:19, Marek Szyprowski wrote:
>> The Documentation/DMA-API-HOWTO.txt states that dma_map_sg returns the
>> numer of the created entries in the DMA address space. However the
>> subsequent calls to dma_sync_sg_for_{device,cpu} and dma_unmap_sg
>> must be
>> called with the original number of entries passed to dma_map_sg. The
>> sg_table->nents in turn holds the result of the dma_map_sg call as
>> stated
>> in include/linux/scatterlist.h. Adapt the code to obey those rules.
>>
>> Signed-off-by: Marek Szyprowski <m.szyprowski@xxxxxxxxxxx>
>> ---
>> Â drivers/gpu/drm/i915/gem/i915_gem_dmabuf.cÂÂÂÂÂÂ | 13 +++++++------
>> Â drivers/gpu/drm/i915/gem/i915_gem_internal.cÂÂÂÂ |Â 4 ++--
>> Â drivers/gpu/drm/i915/gem/i915_gem_region.cÂÂÂÂÂÂ |Â 4 ++--
>> Â drivers/gpu/drm/i915/gem/i915_gem_shmem.cÂÂÂÂÂÂÂ |Â 5 +++--
>>  drivers/gpu/drm/i915/gem/selftests/huge_pages.c | 10 +++++-----
>> Â drivers/gpu/drm/i915/gem/selftests/mock_dmabuf.c |Â 5 +++--
>> Â drivers/gpu/drm/i915/gt/intel_ggtt.cÂÂÂÂÂÂÂÂÂÂÂÂ | 12 ++++++------
>> Â drivers/gpu/drm/i915/i915_gem_gtt.cÂÂÂÂÂÂÂÂÂÂÂÂÂ | 12 +++++++-----
>> Â drivers/gpu/drm/i915/i915_scatterlist.cÂÂÂÂÂÂÂÂÂ |Â 4 ++--
>> Â drivers/gpu/drm/i915/selftests/scatterlist.cÂÂÂÂ |Â 8 ++++----
>> Â 10 files changed, 41 insertions(+), 36 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
>> b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
>> index 7db5a79..d829852 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
>> @@ -36,21 +36,22 @@ static struct sg_table
>> *i915_gem_map_dma_buf(struct dma_buf_attachment *attachme
>> ÂÂÂÂÂÂÂÂÂ goto err_unpin_pages;
>> ÂÂÂÂÂ }
>> Â -ÂÂÂ ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL);
>> +ÂÂÂ ret = sg_alloc_table(st, obj->mm.pages->orig_nents, GFP_KERNEL);
>> ÂÂÂÂÂ if (ret)
>> ÂÂÂÂÂÂÂÂÂ goto err_free;
>> Â ÂÂÂÂÂ src = obj->mm.pages->sgl;
>> ÂÂÂÂÂ dst = st->sgl;
>> -ÂÂÂ for (i = 0; i < obj->mm.pages->nents; i++) {
>> +ÂÂÂ for (i = 0; i < obj->mm.pages->orig_nents; i++) {
>> ÂÂÂÂÂÂÂÂÂ sg_set_page(dst, sg_page(src), src->length, 0);
>> ÂÂÂÂÂÂÂÂÂ dst = sg_next(dst);
>> ÂÂÂÂÂÂÂÂÂ src = sg_next(src);
>> ÂÂÂÂÂ }
>> Â -ÂÂÂ if (!dma_map_sg_attrs(attachment->dev,
>> -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ st->sgl, st->nents, dir,
>> -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ DMA_ATTR_SKIP_CPU_SYNC)) {
>> +ÂÂÂ st->nents = dma_map_sg_attrs(attachment->dev,
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ st->sgl, st->orig_nents, dir,
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ DMA_ATTR_SKIP_CPU_SYNC);
>> +ÂÂÂ if (!st->nents) {
>> ÂÂÂÂÂÂÂÂÂ ret = -ENOMEM;
>> ÂÂÂÂÂÂÂÂÂ goto err_free_sg;
>> ÂÂÂÂÂ }
>> @@ -74,7 +75,7 @@ static void i915_gem_unmap_dma_buf(struct
>> dma_buf_attachment *attachment,
>> ÂÂÂÂÂ struct drm_i915_gem_object *obj =
>> dma_buf_to_obj(attachment->dmabuf);
>> Â ÂÂÂÂÂ dma_unmap_sg_attrs(attachment->dev,
>> -ÂÂÂÂÂÂÂÂÂÂÂÂÂÂ sg->sgl, sg->nents, dir,
>> +ÂÂÂÂÂÂÂÂÂÂÂÂÂÂ sg->sgl, sg->orig_nents, dir,
>> ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ DMA_ATTR_SKIP_CPU_SYNC);
>> ÂÂÂÂÂ sg_free_table(sg);
>> ÂÂÂÂÂ kfree(sg);
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
>> b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
>> index cbbff81..a8ebfdd 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c
>> @@ -73,7 +73,7 @@ static int
>> i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
>> ÂÂÂÂÂ }
>> Â ÂÂÂÂÂ sg = st->sgl;
>> -ÂÂÂ st->nents = 0;
>> +ÂÂÂ st->nents = st->orig_nents = 0;
>> ÂÂÂÂÂ sg_page_sizes = 0;
>> Â ÂÂÂÂÂ do {
>> @@ -94,7 +94,7 @@ static int
>> i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
>> Â ÂÂÂÂÂÂÂÂÂ sg_set_page(sg, page, PAGE_SIZE << order, 0);
>> ÂÂÂÂÂÂÂÂÂ sg_page_sizes |= PAGE_SIZE << order;
>> -ÂÂÂÂÂÂÂ st->nents++;
>> +ÂÂÂÂÂÂÂ st->nents = st->orig_nents = st->nents + 1;
>> Â ÂÂÂÂÂÂÂÂÂ npages -= 1 << order;
>> ÂÂÂÂÂÂÂÂÂ if (!npages) {
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c
>> b/drivers/gpu/drm/i915/gem/i915_gem_region.c
>> index 1515384..58ca560 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c
>> @@ -53,7 +53,7 @@
>> ÂÂÂÂÂ GEM_BUG_ON(list_empty(blocks));
>> Â ÂÂÂÂÂ sg = st->sgl;
>> -ÂÂÂ st->nents = 0;
>> +ÂÂÂ st->nents = st->orig_nents = 0;
>> ÂÂÂÂÂ sg_page_sizes = 0;
>> ÂÂÂÂÂ prev_end = (resource_size_t)-1;
>> Â @@ -78,7 +78,7 @@
>> Â ÂÂÂÂÂÂÂÂÂÂÂÂÂ sg->length = block_size;
>> Â -ÂÂÂÂÂÂÂÂÂÂÂ st->nents++;
>> +ÂÂÂÂÂÂÂÂÂÂÂ st->nents = st->orig_nents = st->nents + 1;
>> ÂÂÂÂÂÂÂÂÂ } else {
>> ÂÂÂÂÂÂÂÂÂÂÂÂÂ sg->length += block_size;
>> ÂÂÂÂÂÂÂÂÂÂÂÂÂ sg_dma_len(sg) += block_size;
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>> b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>> index 5d5d7ee..851a732 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c
>> @@ -80,7 +80,7 @@ static int shmem_get_pages(struct
>> drm_i915_gem_object *obj)
>> ÂÂÂÂÂ noreclaim |= __GFP_NORETRY | __GFP_NOWARN;
>> Â ÂÂÂÂÂ sg = st->sgl;
>> -ÂÂÂ st->nents = 0;
>> +ÂÂÂ st->nents = st->orig_nents = 0;
>> ÂÂÂÂÂ sg_page_sizes = 0;
>> ÂÂÂÂÂ for (i = 0; i < page_count; i++) {
>> ÂÂÂÂÂÂÂÂÂ const unsigned int shrink[] = {
>> @@ -140,7 +140,8 @@ static int shmem_get_pages(struct
>> drm_i915_gem_object *obj)
>> ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ sg_page_sizes |= sg->length;
>> ÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂÂ sg = sg_next(sg);
>> ÂÂÂÂÂÂÂÂÂÂÂÂÂ }
>> -ÂÂÂÂÂÂÂÂÂÂÂ st->nents++;
>> +ÂÂÂÂÂÂÂÂÂÂÂ st->nents = st->orig_nents = st->nents + 1;
>
> A bit higher up, not shown in the patch, we have allocated a table via
> sg_alloc_table giving it a pessimistic max nents, sometimes much
> larger than the st->nents this loops will create. But orig_nents has
> been now been overwritten. Will that leak memory come sg_table_free?

Indeed this will leak memory. I'm missed that sg_trim() will adjust
nents and orig_nents.

I will drop those changes as they are only a creative (or hacky) way of
using sg_table and scatterlists.

> As minimum it will nerf our i915_sg_trim optimization a bit lower
> down, also not shown in the diff.
>
> > [...]

Best regards
--
Marek Szyprowski, PhD
Samsung R&D Institute Poland