Re: [PATCH 4/4] iommu/arm-smmu-v3: Remove cmpxchg() in arm_smmu_cmdq_issue_cmdlist()
From: kernel test robot
Date: Mon Jun 22 2020 - 21:09:59 EST
Hi John,
I love your patch! Perhaps something to improve:
[auto build test WARNING on iommu/next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use as documented in
https://git-scm.com/docs/git-format-patch]
url: https://github.com/0day-ci/linux/commits/John-Garry/iommu-arm-smmu-v3-Improve-cmdq-lock-efficiency/20200623-013438
base: https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git next
config: arm64-randconfig-c024-20200622 (attached as .config)
compiler: aarch64-linux-gcc (GCC) 9.3.0
If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@xxxxxxxxx>
All warnings (new ones prefixed by >>, old ones prefixed by <<):
In file included from include/linux/bits.h:23,
from include/linux/ioport.h:15,
from include/linux/acpi.h:12,
from drivers/iommu/arm-smmu-v3.c:12:
drivers/iommu/arm-smmu-v3.c: In function 'arm_smmu_cmdq_issue_cmdlist':
include/linux/bits.h:26:28: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits]
26 | __builtin_constant_p((l) > (h)), (l) > (h), 0)))
| ^
include/linux/build_bug.h:16:62: note: in definition of macro 'BUILD_BUG_ON_ZERO'
16 | #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
| ^
include/linux/bits.h:39:3: note: in expansion of macro 'GENMASK_INPUT_CHECK'
39 | (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
| ^~~~~~~~~~~~~~~~~~~
>> drivers/iommu/arm-smmu-v3.c:1404:18: note: in expansion of macro 'GENMASK'
1404 | u32 prod_mask = GENMASK(cmdq->q.llq.max_n_shift, 0);
| ^~~~~~~
include/linux/bits.h:26:40: warning: comparison of unsigned expression < 0 is always false [-Wtype-limits]
26 | __builtin_constant_p((l) > (h)), (l) > (h), 0)))
| ^
include/linux/build_bug.h:16:62: note: in definition of macro 'BUILD_BUG_ON_ZERO'
16 | #define BUILD_BUG_ON_ZERO(e) ((int)(sizeof(struct { int:(-!!(e)); })))
| ^
include/linux/bits.h:39:3: note: in expansion of macro 'GENMASK_INPUT_CHECK'
39 | (GENMASK_INPUT_CHECK(h, l) + __GENMASK(h, l))
| ^~~~~~~~~~~~~~~~~~~
>> drivers/iommu/arm-smmu-v3.c:1404:18: note: in expansion of macro 'GENMASK'
1404 | u32 prod_mask = GENMASK(cmdq->q.llq.max_n_shift, 0);
| ^~~~~~~
vim +/GENMASK +1404 drivers/iommu/arm-smmu-v3.c
1369
1370 /*
1371 * This is the actual insertion function, and provides the following
1372 * ordering guarantees to callers:
1373 *
1374 * - There is a dma_wmb() before publishing any commands to the queue.
1375 * This can be relied upon to order prior writes to data structures
1376 * in memory (such as a CD or an STE) before the command.
1377 *
1378 * - On completion of a CMD_SYNC, there is a control dependency.
1379 * This can be relied upon to order subsequent writes to memory (e.g.
1380 * freeing an IOVA) after completion of the CMD_SYNC.
1381 *
1382 * - Command insertion is totally ordered, so if two CPUs each race to
1383 * insert their own list of commands then all of the commands from one
1384 * CPU will appear before any of the commands from the other CPU.
1385 *
1386 * - A CMD_SYNC is always inserted, which ensures we limit the prod pointer
1387 * for when the cmdq is full, such that we don't wrap more than twice.
1388 * It also makes it easy for the owner to know by how many to increment the
1389 * cmdq lock.
1390 */
1391 static int arm_smmu_cmdq_issue_cmdlist(struct arm_smmu_device *smmu,
1392 u64 *cmds, int n)
1393 {
1394 u64 cmd_sync[CMDQ_ENT_DWORDS];
1395 const int sync = 1;
1396 u32 prod;
1397 unsigned long flags;
1398 bool owner;
1399 struct arm_smmu_cmdq *cmdq = &smmu->cmdq;
1400 struct arm_smmu_ll_queue llq = {
1401 .max_n_shift = cmdq->q.llq.max_n_shift,
1402 }, head = llq, space = llq;
1403 u32 owner_val = 1 << cmdq->q.llq.owner_count_shift;
> 1404 u32 prod_mask = GENMASK(cmdq->q.llq.max_n_shift, 0);
1405 u32 owner_mask = GENMASK(30, cmdq->q.llq.owner_count_shift);
1406 int ret = 0;
1407
1408 /* 1. Allocate some space in the queue */
1409 local_irq_save(flags);
1410
1411 prod = atomic_fetch_add(n + sync + owner_val,
1412 &cmdq->q.llq.atomic.prod);
1413
1414 owner = !(prod & owner_mask);
1415 llq.prod = prod_mask & prod;
1416 head.prod = queue_inc_prod_n(&llq, n + sync);
1417
1418 /*
1419 * Ensure it's safe to write the entries. For this, we need to ensure
1420 * that there is space in the queue from our prod pointer.
1421 */
1422 space.cons = READ_ONCE(cmdq->q.llq.cons);
1423 space.prod = llq.prod;
1424 while (!queue_has_space(&space, n + sync)) {
1425 if (arm_smmu_cmdq_poll_until_not_full(smmu, &space))
1426 dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
1427
1428 space.prod = llq.prod;
1429 }
1430
1431 /*
1432 * 2. Write our commands into the queue
1433 * Dependency ordering from the space-checking while loop, above.
1434 */
1435 arm_smmu_cmdq_write_entries(cmdq, cmds, llq.prod, n);
1436
1437 prod = queue_inc_prod_n(&llq, n);
1438 arm_smmu_cmdq_build_sync_cmd(cmd_sync, smmu, prod);
1439 queue_write(Q_ENT(&cmdq->q, prod), cmd_sync, CMDQ_ENT_DWORDS);
1440
1441 /* 3. Mark our slots as valid, ensuring commands are visible first */
1442 dma_wmb();
1443 arm_smmu_cmdq_set_valid_map(cmdq, llq.prod, head.prod);
1444
1445 /* 4. If we are the owner, take control of the SMMU hardware */
1446 if (owner) {
1447 int owner_count;
1448 u32 prod_tmp;
1449
1450 /* a. Wait for previous owner to finish */
1451 atomic_cond_read_relaxed(&cmdq->owner_prod, VAL == llq.prod);
1452
1453 /* b. Stop gathering work by clearing the owned mask */
1454 prod_tmp = atomic_fetch_andnot_relaxed(~prod_mask,
1455 &cmdq->q.llq.atomic.prod);
1456 prod = prod_tmp & prod_mask;
1457 owner_count = prod_tmp & owner_mask;
1458 owner_count >>= cmdq->q.llq.owner_count_shift;
1459
1460 /*
1461 * c. Wait for any gathered work to be written to the queue.
1462 * Note that we read our own entries so that we have the control
1463 * dependency required by (d).
1464 */
1465 arm_smmu_cmdq_poll_valid_map(cmdq, llq.prod, prod);
1466
1467 /*
1468 * In order to determine completion of the CMD_SYNCs, we must
1469 * ensure that the queue can't wrap twice without us noticing.
1470 * We achieve that by taking the cmdq lock as shared before
1471 * progressing the prod pointer.
1472 * The owner does this for all the non-owners it has gathered.
1473 * Otherwise, some non-owner(s) may lock the cmdq, blocking
1474 * cons being updating. This could be when the cmdq has just
1475 * become full. In this case, other sibling non-owners could be
1476 * blocked from progressing, leading to deadlock.
1477 */
1478 arm_smmu_cmdq_shared_lock(cmdq, owner_count);
1479
1480 /*
1481 * d. Advance the hardware prod pointer
1482 * Control dependency ordering from the entries becoming valid.
1483 */
1484 writel_relaxed(prod, cmdq->q.prod_reg);
1485
1486 /*
1487 * e. Tell the next owner we're done
1488 * Make sure we've updated the hardware first, so that we don't
1489 * race to update prod and potentially move it backwards.
1490 */
1491 atomic_set_release(&cmdq->owner_prod, prod);
1492 }
1493
1494 /* 5. Since we always insert a CMD_SYNC, we must wait for it to complete */
1495 llq.prod = queue_inc_prod_n(&llq, n);
1496 ret = arm_smmu_cmdq_poll_until_sync(smmu, &llq);
1497 if (ret)
1498 dev_err_ratelimited(smmu->dev, "CMD_SYNC timeout at 0x%08x [hwprod 0x%08x, hwcons 0x%08x]\n",
1499 llq.prod, readl_relaxed(cmdq->q.prod_reg),
1500 readl_relaxed(cmdq->q.cons_reg));
1501
1502 /*
1503 * Try to unlock the cmdq lock. This will fail if we're the last reader,
1504 * in which case we can safely update cmdq->q.llq.cons
1505 */
1506 if (!arm_smmu_cmdq_shared_tryunlock(cmdq)) {
1507 WRITE_ONCE(cmdq->q.llq.cons, llq.cons);
1508 arm_smmu_cmdq_shared_unlock(cmdq);
1509 }
1510
1511 local_irq_restore(flags);
1512 return ret;
1513 }
1514
---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@xxxxxxxxxxxx
Attachment:
.config.gz
Description: application/gzip