Re: [PATCH 2/2] gpu/radeon: use HMM mirror for userptr buffer object.

From: kbuild test robot
Date: Mon Sep 10 2018 - 14:35:41 EST


Hi Jérôme,

I love your patch! Yet something to improve:

[auto build test ERROR on linus/master]
[also build test ERROR on v4.19-rc3 next-20180910]
[if your patch is applied to the wrong git tree, please drop us a note to help improve the system]

url: https://github.com/0day-ci/linux/commits/jglisse-redhat-com/Getting-rid-of-GUP-and-use-HMM-for-user-ptr-features/20180911-020741
config: x86_64-randconfig-x017-201836 (attached as .config)
compiler: gcc-7 (Debian 7.3.0-1) 7.3.0
reproduce:
# save the attached .config to linux build tree
make ARCH=x86_64

All errors (new ones prefixed by >>):

drivers/gpu/drm/radeon/radeon_mn.c:43:20: error: field 'mirror' has incomplete type
struct hmm_mirror mirror;
^~~~~~
drivers/gpu/drm/radeon/radeon_mn.c: In function 'radeon_mn_destroy':
drivers/gpu/drm/radeon/radeon_mn.c:90:2: error: implicit declaration of function 'hmm_mirror_unregister'; did you mean 'drm_dp_aux_unregister'? [-Werror=implicit-function-declaration]
hmm_mirror_unregister(&rmn->mirror);
^~~~~~~~~~~~~~~~~~~~~
drm_dp_aux_unregister
In file included from include/linux/firmware.h:6:0,
from drivers/gpu/drm/radeon/radeon_mn.c:31:
drivers/gpu/drm/radeon/radeon_mn.c: In function 'radeon_mirror_release':
include/linux/kernel.h:997:32: error: dereferencing pointer to incomplete type 'struct hmm_mirror'
BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \
^~~~~~
include/linux/compiler.h:335:18: note: in definition of macro '__compiletime_assert'
int __cond = !(condition); \
^~~~~~~~~
include/linux/compiler.h:358:2: note: in expansion of macro '_compiletime_assert'
_compiletime_assert(condition, msg, __compiletime_assert_, __LINE__)
^~~~~~~~~~~~~~~~~~~
include/linux/build_bug.h:45:37: note: in expansion of macro 'compiletime_assert'
#define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
^~~~~~~~~~~~~~~~~~
include/linux/kernel.h:997:2: note: in expansion of macro 'BUILD_BUG_ON_MSG'
BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \
^~~~~~~~~~~~~~~~
include/linux/kernel.h:997:20: note: in expansion of macro '__same_type'
BUILD_BUG_ON_MSG(!__same_type(*(ptr), ((type *)0)->member) && \
^~~~~~~~~~~
drivers/gpu/drm/radeon/radeon_mn.c:103:26: note: in expansion of macro 'container_of'
struct radeon_mn *rmn = container_of(mirror, struct radeon_mn, mirror);
^~~~~~~~~~~~
drivers/gpu/drm/radeon/radeon_mn.c: At top level:
drivers/gpu/drm/radeon/radeon_mn.c:119:24: warning: 'struct hmm_update' declared inside parameter list will not be visible outside of this definition or declaration
const struct hmm_update *update)
^~~~~~~~~~
drivers/gpu/drm/radeon/radeon_mn.c: In function 'radeon_sync_cpu_device_pagetables':
drivers/gpu/drm/radeon/radeon_mn.c:128:14: error: dereferencing pointer to incomplete type 'const struct hmm_update'
end = update->end - 1;
^~
drivers/gpu/drm/radeon/radeon_mn.c: At top level:
drivers/gpu/drm/radeon/radeon_mn.c:183:21: error: variable 'radeon_mirror_ops' has initializer but incomplete type
static const struct hmm_mirror_ops radeon_mirror_ops = {
^~~~~~~~~~~~~~
drivers/gpu/drm/radeon/radeon_mn.c:184:3: error: 'const struct hmm_mirror_ops' has no member named 'sync_cpu_device_pagetables'
.sync_cpu_device_pagetables = &radeon_sync_cpu_device_pagetables,
^~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/gpu/drm/radeon/radeon_mn.c:184:32: warning: excess elements in struct initializer
.sync_cpu_device_pagetables = &radeon_sync_cpu_device_pagetables,
^
drivers/gpu/drm/radeon/radeon_mn.c:184:32: note: (near initialization for 'radeon_mirror_ops')
drivers/gpu/drm/radeon/radeon_mn.c:185:3: error: 'const struct hmm_mirror_ops' has no member named 'release'
.release = &radeon_mirror_release,
^~~~~~~
drivers/gpu/drm/radeon/radeon_mn.c:185:13: warning: excess elements in struct initializer
.release = &radeon_mirror_release,
^
drivers/gpu/drm/radeon/radeon_mn.c:185:13: note: (near initialization for 'radeon_mirror_ops')
drivers/gpu/drm/radeon/radeon_mn.c: In function 'radeon_mn_get':
drivers/gpu/drm/radeon/radeon_mn.c:224:6: error: implicit declaration of function 'hmm_mirror_register'; did you mean 'drm_dp_aux_register'? [-Werror=implicit-function-declaration]
r = hmm_mirror_register(&new->mirror, mm);
^~~~~~~~~~~~~~~~~~~
drm_dp_aux_register
drivers/gpu/drm/radeon/radeon_mn.c: In function 'radeon_mn_bo_map':
>> drivers/gpu/drm/radeon/radeon_mn.c:373:43: error: 'HMM_PFN_FLAG_MAX' undeclared (first use in this function); did you mean 'TTM_PL_FLAG_VRAM'?
static const uint64_t radeon_range_flags[HMM_PFN_FLAG_MAX] = {
^~~~~~~~~~~~~~~~
TTM_PL_FLAG_VRAM
drivers/gpu/drm/radeon/radeon_mn.c:373:43: note: each undeclared identifier is reported only once for each function it appears in
>> drivers/gpu/drm/radeon/radeon_mn.c:378:44: error: 'HMM_PFN_VALUE_MAX' undeclared (first use in this function); did you mean 'HMM_PFN_FLAG_MAX'?
static const uint64_t radeon_range_values[HMM_PFN_VALUE_MAX] = {
^~~~~~~~~~~~~~~~~
HMM_PFN_FLAG_MAX
>> drivers/gpu/drm/radeon/radeon_mn.c:389:19: error: storage size of 'range' isn't known
struct hmm_range range;
^~~~~
>> drivers/gpu/drm/radeon/radeon_mn.c:421:31: error: 'HMM_PFN_VALID' undeclared (first use in this function); did you mean 'HMM_PFN_VALUE_MAX'?
range.pfns[i] = range.flags[HMM_PFN_VALID];
^~~~~~~~~~~~~
HMM_PFN_VALUE_MAX
>> drivers/gpu/drm/radeon/radeon_mn.c:422:40: error: 'HMM_PFN_WRITE' undeclared (first use in this function); did you mean 'HMM_PFN_VALID'?
range.pfns[i] |= write ? range.flags[HMM_PFN_WRITE] : 0;
^~~~~~~~~~~~~
HMM_PFN_VALID
>> drivers/gpu/drm/radeon/radeon_mn.c:425:8: error: implicit declaration of function 'hmm_vma_fault'; did you mean 'filemap_fault'? [-Werror=implicit-function-declaration]
ret = hmm_vma_fault(&range, true);
^~~~~~~~~~~~~
filemap_fault
>> drivers/gpu/drm/radeon/radeon_mn.c:430:23: error: implicit declaration of function 'hmm_pfn_to_page'; did you mean '__pfn_to_page'? [-Werror=implicit-function-declaration]
struct page *page = hmm_pfn_to_page(&range, range.pfns[i]);
^~~~~~~~~~~~~~~
__pfn_to_page
>> drivers/gpu/drm/radeon/radeon_mn.c:446:4: error: implicit declaration of function 'hmm_vma_range_done'; did you mean 'drm_vma_node_size'? [-Werror=implicit-function-declaration]
hmm_vma_range_done(&range);
^~~~~~~~~~~~~~~~~~
drm_vma_node_size
drivers/gpu/drm/radeon/radeon_mn.c:389:19: warning: unused variable 'range' [-Wunused-variable]
struct hmm_range range;
^~~~~
drivers/gpu/drm/radeon/radeon_mn.c:378:24: warning: unused variable 'radeon_range_values' [-Wunused-variable]
static const uint64_t radeon_range_values[HMM_PFN_VALUE_MAX] = {
^~~~~~~~~~~~~~~~~~~
drivers/gpu/drm/radeon/radeon_mn.c:373:24: warning: unused variable 'radeon_range_flags' [-Wunused-variable]
static const uint64_t radeon_range_flags[HMM_PFN_FLAG_MAX] = {
^~~~~~~~~~~~~~~~~~
drivers/gpu/drm/radeon/radeon_mn.c: At top level:
drivers/gpu/drm/radeon/radeon_mn.c:183:36: error: storage size of 'radeon_mirror_ops' isn't known
static const struct hmm_mirror_ops radeon_mirror_ops = {
^~~~~~~~~~~~~~~~~
cc1: some warnings being treated as errors

vim +373 drivers/gpu/drm/radeon/radeon_mn.c

182
183 static const struct hmm_mirror_ops radeon_mirror_ops = {
184 .sync_cpu_device_pagetables = &radeon_sync_cpu_device_pagetables,
> 185 .release = &radeon_mirror_release,
186 };
187
188 /**
189 * radeon_mn_get - create notifier context
190 *
191 * @rdev: radeon device pointer
192 *
193 * Creates a notifier context for current->mm.
194 */
195 static struct radeon_mn *radeon_mn_get(struct radeon_device *rdev)
196 {
197 struct mm_struct *mm = current->mm;
198 struct radeon_mn *rmn, *new;
199 int r;
200
201 mutex_lock(&rdev->mn_lock);
202 hash_for_each_possible(rdev->mn_hash, rmn, node, (unsigned long)mm) {
203 if (rmn->mm == mm) {
204 mutex_unlock(&rdev->mn_lock);
205 return rmn;
206 }
207 }
208 mutex_unlock(&rdev->mn_lock);
209
210 new = kzalloc(sizeof(*rmn), GFP_KERNEL);
211 if (!new) {
212 return ERR_PTR(-ENOMEM);
213 }
214 new->mm = mm;
215 new->rdev = rdev;
216 mutex_init(&new->lock);
217 new->objects = RB_ROOT_CACHED;
218 new->mirror.ops = &radeon_mirror_ops;
219
220 if (down_write_killable(&mm->mmap_sem)) {
221 kfree(new);
222 return ERR_PTR(-EINTR);
223 }
224 r = hmm_mirror_register(&new->mirror, mm);
225 up_write(&mm->mmap_sem);
226 if (r) {
227 kfree(new);
228 return ERR_PTR(r);
229 }
230
231 mutex_lock(&rdev->mn_lock);
232 /* Check again in case some other thread raced with us ... */
233 hash_for_each_possible(rdev->mn_hash, rmn, node, (unsigned long)mm) {
234 if (rmn->mm == mm) {
235 mutex_unlock(&rdev->mn_lock);
236 hmm_mirror_unregister(&new->mirror);
237 kfree(new);
238 return rmn;
239 }
240 }
241 hash_add(rdev->mn_hash, &new->node, (unsigned long)mm);
242 mutex_unlock(&rdev->mn_lock);
243
244 return new;
245 }
246
247 /**
248 * radeon_mn_register - register a BO for notifier updates
249 *
250 * @bo: radeon buffer object
251 * @addr: userptr addr we should monitor
252 *
253 * Registers an MMU notifier for the given BO at the specified address.
254 * Returns 0 on success, -ERRNO if anything goes wrong.
255 */
256 int radeon_mn_register(struct radeon_bo *bo, unsigned long addr)
257 {
258 unsigned long end = addr + radeon_bo_size(bo) - 1;
259 struct radeon_device *rdev = bo->rdev;
260 struct radeon_mn *rmn;
261 struct radeon_mn_node *node = NULL;
262 struct list_head bos;
263 struct interval_tree_node *it;
264
265 bo->userptr = addr;
266 bo->pfns = kvmalloc_array(bo->tbo.num_pages, sizeof(uint64_t),
267 GFP_KERNEL | __GFP_ZERO);
268 if (bo->pfns == NULL)
269 return -ENOMEM;
270
271 rmn = radeon_mn_get(rdev);
272 if (IS_ERR(rmn)) {
273 kvfree(bo->pfns);
274 bo->pfns = NULL;
275 return PTR_ERR(rmn);
276 }
277
278 INIT_LIST_HEAD(&bos);
279
280 mutex_lock(&rmn->lock);
281
282 while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) {
283 kfree(node);
284 node = container_of(it, struct radeon_mn_node, it);
285 interval_tree_remove(&node->it, &rmn->objects);
286 addr = min(it->start, addr);
287 end = max(it->last, end);
288 list_splice(&node->bos, &bos);
289 }
290
291 if (!node) {
292 node = kmalloc(sizeof(struct radeon_mn_node), GFP_KERNEL);
293 if (!node) {
294 mutex_unlock(&rmn->lock);
295 kvfree(bo->pfns);
296 bo->pfns = NULL;
297 return -ENOMEM;
298 }
299 }
300
301 bo->mn = rmn;
302
303 node->it.start = addr;
304 node->it.last = end;
305 INIT_LIST_HEAD(&node->bos);
306 list_splice(&bos, &node->bos);
307 list_add(&bo->mn_list, &node->bos);
308
309 interval_tree_insert(&node->it, &rmn->objects);
310
311 mutex_unlock(&rmn->lock);
312
313 return 0;
314 }
315
316 /**
317 * radeon_mn_unregister - unregister a BO for notifier updates
318 *
319 * @bo: radeon buffer object
320 *
321 * Remove any registration of MMU notifier updates from the buffer object.
322 */
323 void radeon_mn_unregister(struct radeon_bo *bo)
324 {
325 struct radeon_device *rdev = bo->rdev;
326 struct radeon_mn *rmn;
327 struct list_head *head;
328
329 mutex_lock(&rdev->mn_lock);
330 rmn = bo->mn;
331 if (rmn == NULL) {
332 mutex_unlock(&rdev->mn_lock);
333 return;
334 }
335
336 mutex_lock(&rmn->lock);
337 /* save the next list entry for later */
338 head = bo->mn_list.next;
339
340 bo->mn = NULL;
341 list_del(&bo->mn_list);
342
343 if (list_empty(head)) {
344 struct radeon_mn_node *node;
345 node = container_of(head, struct radeon_mn_node, bos);
346 interval_tree_remove(&node->it, &rmn->objects);
347 kfree(node);
348 }
349
350 mutex_unlock(&rmn->lock);
351 mutex_unlock(&rdev->mn_lock);
352
353 kvfree(bo->pfns);
354 bo->pfns = NULL;
355 }
356
357 /**
358 * radeon_mn_bo_map - map range of virtual address as buffer object
359 *
360 * @bo: radeon buffer object
361 * @ttm: ttm_tt object in which holds mirroring result
362 * @write: can GPU write to the range ?
363 * Returns: 0 on success, error code otherwise
364 *
365 * Use HMM to mirror a range of virtual address as a buffer object mapped into
366 * GPU address space (thus allowing transparent GPU access to this range). It
367 * does not pin pages for range but rely on HMM and underlying synchronizations
368 * to make sure that both CPU and GPU points to same physical memory for the
369 * range.
370 */
371 int radeon_mn_bo_map(struct radeon_bo *bo, struct ttm_dma_tt *dma, bool write)
372 {
> 373 static const uint64_t radeon_range_flags[HMM_PFN_FLAG_MAX] = {
374 (1 << 0), /* HMM_PFN_VALID */
375 (1 << 1), /* HMM_PFN_WRITE */
376 0 /* HMM_PFN_DEVICE_PRIVATE */
377 };
> 378 static const uint64_t radeon_range_values[HMM_PFN_VALUE_MAX] = {
379 0xfffffffffffffffeUL, /* HMM_PFN_ERROR */
380 0, /* HMM_PFN_NONE */
381 0xfffffffffffffffcUL /* HMM_PFN_SPECIAL */
382 };
383
384 unsigned long i, npages = bo->tbo.num_pages;
385 enum dma_data_direction direction = write ?
386 DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
387 struct radeon_device *rdev = bo->rdev;
388 struct ttm_tt *ttm = &dma->ttm;
> 389 struct hmm_range range;
390 struct radeon_mn *rmn;
391 int ret;
392
393 /*
394 * FIXME This whole protection shouldn't be needed as we should only
395 * reach that code with a valid reserved bo that can not under go a
396 * concurrent radeon_mn_unregister().
397 */
398 mutex_lock(&rdev->mn_lock);
399 if (bo->mn == NULL) {
400 mutex_unlock(&rdev->mn_lock);
401 return -EINVAL;
402 }
403 rmn = bo->mn;
404 mutex_unlock(&rdev->mn_lock);
405
406 range.pfn_shift = 12;
407 range.pfns = bo->pfns;
408 range.start = bo->userptr;
409 range.flags = radeon_range_flags;
410 range.values = radeon_range_values;
411 range.end = bo->userptr + radeon_bo_size(bo);
412
413 range.vma = find_vma(rmn->mm, bo->userptr);
414 if (!range.vma || range.vma->vm_file || range.vma->vm_end < range.end)
415 return -EPERM;
416
417 memset(ttm->pages, 0, sizeof(void*) * npages);
418
419 again:
420 for (i = 0; i < npages; ++i) {
> 421 range.pfns[i] = range.flags[HMM_PFN_VALID];
> 422 range.pfns[i] |= write ? range.flags[HMM_PFN_WRITE] : 0;
423 }
424
> 425 ret = hmm_vma_fault(&range, true);
426 if (ret)
427 goto err_unmap;
428
429 for (i = 0; i < npages; ++i) {
> 430 struct page *page = hmm_pfn_to_page(&range, range.pfns[i]);
431
432 if (page == NULL)
433 goto again;
434
435 if (ttm->pages[i] == page)
436 continue;
437
438 if (ttm->pages[i])
439 dma_unmap_page(rdev->dev, dma->dma_address[i],
440 PAGE_SIZE, direction);
441 ttm->pages[i] = page;
442
443 dma->dma_address[i] = dma_map_page(rdev->dev, page, 0,
444 PAGE_SIZE, direction);
445 if (dma_mapping_error(rdev->dev, dma->dma_address[i])) {
> 446 hmm_vma_range_done(&range);
447 ttm->pages[i] = NULL;
448 ret = -ENOMEM;
449 goto err_unmap;
450 }
451 }
452
453 /*
454 * Taking rmn->lock is not necessary here as we are protected from any
455 * concurrent invalidation through ttm object reservation. Involved
456 * functions: radeon_sync_cpu_device_pagetables()
457 * radeon_bo_list_validate()
458 * radeon_gem_userptr_ioctl()
459 */
460 if (!hmm_vma_range_done(&range))
461 goto again;
462
463 return 0;
464
465 err_unmap:
466 radeon_mn_bo_unmap(bo, dma, write);
467 return ret;
468 }
469

---
0-DAY kernel test infrastructure Open Source Technology Center
https://lists.01.org/pipermail/kbuild-all Intel Corporation

Attachment: .config.gz
Description: application/gzip