Re: [PATCH 03/13] MM: reclaim mustn't enter FS for swap-over-NFS

From: kernel test robot
Date: Wed Nov 17 2021 - 20:43:34 EST


Hi NeilBrown,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on v5.16-rc1]
[also build test ERROR on next-20211117]
[cannot apply to trondmy-nfs/linux-next hnaz-mm/master rostedt-trace/for-next]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url: https://github.com/0day-ci/linux/commits/NeilBrown/Repair-SWAP-over-NFS/20211116-104822
base: fa55b7dcdc43c1aa1ba12bca9d2dd4318c2a0dbf
config: mips-randconfig-r031-20211116 (attached as .config)
compiler: mipsel-linux-gcc (GCC) 11.2.0
reproduce (this is a W=1 build):
wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross
chmod +x ~/bin/make.cross
# https://github.com/0day-ci/linux/commit/b2f1d12df57f816d09ef57fa73758fec820a23f1
git remote add linux-review https://github.com/0day-ci/linux
git fetch --no-tags linux-review NeilBrown/Repair-SWAP-over-NFS/20211116-104822
git checkout b2f1d12df57f816d09ef57fa73758fec820a23f1
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-11.2.0 make.cross ARCH=mips

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@xxxxxxxxx>

All errors (new ones prefixed by >>):

In file included from <command-line>:
mm/vmscan.c: In function 'shrink_page_list':
>> mm/vmscan.c:1522:37: error: implicit declaration of function 'page_swap_info'; did you mean 'swp_swap_info'? [-Werror=implicit-function-declaration]
1522 | !data_race(page_swap_info(page)->flags & SWP_FS_OPS) &&
| ^~~~~~~~~~~~~~
include/linux/compiler_types.h:291:27: note: in definition of macro '__unqual_scalar_typeof'
291 | _Generic((x), \
| ^
mm/vmscan.c:1522:27: note: in expansion of macro 'data_race'
1522 | !data_race(page_swap_info(page)->flags & SWP_FS_OPS) &&
| ^~~~~~~~~
>> mm/vmscan.c:1522:57: error: invalid type argument of '->' (have 'int')
1522 | !data_race(page_swap_info(page)->flags & SWP_FS_OPS) &&
| ^~
include/linux/compiler_types.h:291:27: note: in definition of macro '__unqual_scalar_typeof'
291 | _Generic((x), \
| ^
mm/vmscan.c:1522:27: note: in expansion of macro 'data_race'
1522 | !data_race(page_swap_info(page)->flags & SWP_FS_OPS) &&
| ^~~~~~~~~
In file included from arch/mips/include/asm/bug.h:5,
from include/linux/bug.h:5,
from include/linux/mmdebug.h:5,
from include/linux/mm.h:9,
from mm/vmscan.c:15:
>> mm/vmscan.c:1522:57: error: invalid type argument of '->' (have 'int')
1522 | !data_race(page_swap_info(page)->flags & SWP_FS_OPS) &&
| ^~
include/linux/compiler.h:218:17: note: in definition of macro 'data_race'
218 | expr; \
| ^~~~
In file included from <command-line>:
mm/vmscan.c:1692:68: error: invalid type argument of '->' (have 'int')
1692 | !data_race(page_swap_info(page)->flags & SWP_FS_OPS))
| ^~
include/linux/compiler_types.h:291:27: note: in definition of macro '__unqual_scalar_typeof'
291 | _Generic((x), \
| ^
mm/vmscan.c:1692:38: note: in expansion of macro 'data_race'
1692 | !data_race(page_swap_info(page)->flags & SWP_FS_OPS))
| ^~~~~~~~~
In file included from arch/mips/include/asm/bug.h:5,
from include/linux/bug.h:5,
from include/linux/mmdebug.h:5,
from include/linux/mm.h:9,
from mm/vmscan.c:15:
mm/vmscan.c:1692:68: error: invalid type argument of '->' (have 'int')
1692 | !data_race(page_swap_info(page)->flags & SWP_FS_OPS))
| ^~
include/linux/compiler.h:218:17: note: in definition of macro 'data_race'
218 | expr; \
| ^~~~
cc1: some warnings being treated as errors


vim +1522 mm/vmscan.c

1466
1467 /*
1468 * shrink_page_list() returns the number of reclaimed pages
1469 */
1470 static unsigned int shrink_page_list(struct list_head *page_list,
1471 struct pglist_data *pgdat,
1472 struct scan_control *sc,
1473 struct reclaim_stat *stat,
1474 bool ignore_references)
1475 {
1476 LIST_HEAD(ret_pages);
1477 LIST_HEAD(free_pages);
1478 LIST_HEAD(demote_pages);
1479 unsigned int nr_reclaimed = 0;
1480 unsigned int pgactivate = 0;
1481 bool do_demote_pass;
1482
1483 memset(stat, 0, sizeof(*stat));
1484 cond_resched();
1485 do_demote_pass = can_demote(pgdat->node_id, sc);
1486
1487 retry:
1488 while (!list_empty(page_list)) {
1489 struct address_space *mapping;
1490 struct page *page;
1491 enum page_references references = PAGEREF_RECLAIM;
1492 bool dirty, writeback, may_enter_fs;
1493 unsigned int nr_pages;
1494
1495 cond_resched();
1496
1497 page = lru_to_page(page_list);
1498 list_del(&page->lru);
1499
1500 if (!trylock_page(page))
1501 goto keep;
1502
1503 VM_BUG_ON_PAGE(PageActive(page), page);
1504
1505 nr_pages = compound_nr(page);
1506
1507 /* Account the number of base pages even though THP */
1508 sc->nr_scanned += nr_pages;
1509
1510 if (unlikely(!page_evictable(page)))
1511 goto activate_locked;
1512
1513 if (!sc->may_unmap && page_mapped(page))
1514 goto keep_locked;
1515
1516 /* ->flags can be updated non-atomicially (scan_swap_map_slots),
1517 * but that will never affect SWP_FS_OPS, so the data_race
1518 * is safe.
1519 */
1520 may_enter_fs = (sc->gfp_mask & __GFP_FS) ||
1521 (PageSwapCache(page) &&
> 1522 !data_race(page_swap_info(page)->flags & SWP_FS_OPS) &&
1523 (sc->gfp_mask & __GFP_IO));
1524
1525 /*
1526 * The number of dirty pages determines if a node is marked
1527 * reclaim_congested. kswapd will stall and start writing
1528 * pages if the tail of the LRU is all dirty unqueued pages.
1529 */
1530 page_check_dirty_writeback(page, &dirty, &writeback);
1531 if (dirty || writeback)
1532 stat->nr_dirty++;
1533
1534 if (dirty && !writeback)
1535 stat->nr_unqueued_dirty++;
1536
1537 /*
1538 * Treat this page as congested if the underlying BDI is or if
1539 * pages are cycling through the LRU so quickly that the
1540 * pages marked for immediate reclaim are making it to the
1541 * end of the LRU a second time.
1542 */
1543 mapping = page_mapping(page);
1544 if (((dirty || writeback) && mapping &&
1545 inode_write_congested(mapping->host)) ||
1546 (writeback && PageReclaim(page)))
1547 stat->nr_congested++;
1548
1549 /*
1550 * If a page at the tail of the LRU is under writeback, there
1551 * are three cases to consider.
1552 *
1553 * 1) If reclaim is encountering an excessive number of pages
1554 * under writeback and this page is both under writeback and
1555 * PageReclaim then it indicates that pages are being queued
1556 * for IO but are being recycled through the LRU before the
1557 * IO can complete. Waiting on the page itself risks an
1558 * indefinite stall if it is impossible to writeback the
1559 * page due to IO error or disconnected storage so instead
1560 * note that the LRU is being scanned too quickly and the
1561 * caller can stall after page list has been processed.
1562 *
1563 * 2) Global or new memcg reclaim encounters a page that is
1564 * not marked for immediate reclaim, or the caller does not
1565 * have __GFP_FS (or __GFP_IO if it's simply going to swap,
1566 * not to fs). In this case mark the page for immediate
1567 * reclaim and continue scanning.
1568 *
1569 * Require may_enter_fs because we would wait on fs, which
1570 * may not have submitted IO yet. And the loop driver might
1571 * enter reclaim, and deadlock if it waits on a page for
1572 * which it is needed to do the write (loop masks off
1573 * __GFP_IO|__GFP_FS for this reason); but more thought
1574 * would probably show more reasons.
1575 *
1576 * 3) Legacy memcg encounters a page that is already marked
1577 * PageReclaim. memcg does not have any dirty pages
1578 * throttling so we could easily OOM just because too many
1579 * pages are in writeback and there is nothing else to
1580 * reclaim. Wait for the writeback to complete.
1581 *
1582 * In cases 1) and 2) we activate the pages to get them out of
1583 * the way while we continue scanning for clean pages on the
1584 * inactive list and refilling from the active list. The
1585 * observation here is that waiting for disk writes is more
1586 * expensive than potentially causing reloads down the line.
1587 * Since they're marked for immediate reclaim, they won't put
1588 * memory pressure on the cache working set any longer than it
1589 * takes to write them to disk.
1590 */
1591 if (PageWriteback(page)) {
1592 /* Case 1 above */
1593 if (current_is_kswapd() &&
1594 PageReclaim(page) &&
1595 test_bit(PGDAT_WRITEBACK, &pgdat->flags)) {
1596 stat->nr_immediate++;
1597 goto activate_locked;
1598
1599 /* Case 2 above */
1600 } else if (writeback_throttling_sane(sc) ||
1601 !PageReclaim(page) || !may_enter_fs) {
1602 /*
1603 * This is slightly racy - end_page_writeback()
1604 * might have just cleared PageReclaim, then
1605 * setting PageReclaim here end up interpreted
1606 * as PageReadahead - but that does not matter
1607 * enough to care. What we do want is for this
1608 * page to have PageReclaim set next time memcg
1609 * reclaim reaches the tests above, so it will
1610 * then wait_on_page_writeback() to avoid OOM;
1611 * and it's also appropriate in global reclaim.
1612 */
1613 SetPageReclaim(page);
1614 stat->nr_writeback++;
1615 goto activate_locked;
1616
1617 /* Case 3 above */
1618 } else {
1619 unlock_page(page);
1620 wait_on_page_writeback(page);
1621 /* then go back and try same page again */
1622 list_add_tail(&page->lru, page_list);
1623 continue;
1624 }
1625 }
1626
1627 if (!ignore_references)
1628 references = page_check_references(page, sc);
1629
1630 switch (references) {
1631 case PAGEREF_ACTIVATE:
1632 goto activate_locked;
1633 case PAGEREF_KEEP:
1634 stat->nr_ref_keep += nr_pages;
1635 goto keep_locked;
1636 case PAGEREF_RECLAIM:
1637 case PAGEREF_RECLAIM_CLEAN:
1638 ; /* try to reclaim the page below */
1639 }
1640
1641 /*
1642 * Before reclaiming the page, try to relocate
1643 * its contents to another node.
1644 */
1645 if (do_demote_pass &&
1646 (thp_migration_supported() || !PageTransHuge(page))) {
1647 list_add(&page->lru, &demote_pages);
1648 unlock_page(page);
1649 continue;
1650 }
1651
1652 /*
1653 * Anonymous process memory has backing store?
1654 * Try to allocate it some swap space here.
1655 * Lazyfree page could be freed directly
1656 */
1657 if (PageAnon(page) && PageSwapBacked(page)) {
1658 if (!PageSwapCache(page)) {
1659 if (!(sc->gfp_mask & __GFP_IO))
1660 goto keep_locked;
1661 if (page_maybe_dma_pinned(page))
1662 goto keep_locked;
1663 if (PageTransHuge(page)) {
1664 /* cannot split THP, skip it */
1665 if (!can_split_huge_page(page, NULL))
1666 goto activate_locked;
1667 /*
1668 * Split pages without a PMD map right
1669 * away. Chances are some or all of the
1670 * tail pages can be freed without IO.
1671 */
1672 if (!compound_mapcount(page) &&
1673 split_huge_page_to_list(page,
1674 page_list))
1675 goto activate_locked;
1676 }
1677 if (!add_to_swap(page)) {
1678 if (!PageTransHuge(page))
1679 goto activate_locked_split;
1680 /* Fallback to swap normal pages */
1681 if (split_huge_page_to_list(page,
1682 page_list))
1683 goto activate_locked;
1684 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
1685 count_vm_event(THP_SWPOUT_FALLBACK);
1686 #endif
1687 if (!add_to_swap(page))
1688 goto activate_locked_split;
1689 }
1690
1691 if ((sc->gfp_mask & __GFP_FS) ||
1692 !data_race(page_swap_info(page)->flags & SWP_FS_OPS))
1693 may_enter_fs = true;
1694
1695 /* Adding to swap updated mapping */
1696 mapping = page_mapping(page);
1697 }
1698 } else if (unlikely(PageTransHuge(page))) {
1699 /* Split file THP */
1700 if (split_huge_page_to_list(page, page_list))
1701 goto keep_locked;
1702 }
1703
1704 /*
1705 * THP may get split above, need minus tail pages and update
1706 * nr_pages to avoid accounting tail pages twice.
1707 *
1708 * The tail pages that are added into swap cache successfully
1709 * reach here.
1710 */
1711 if ((nr_pages > 1) && !PageTransHuge(page)) {
1712 sc->nr_scanned -= (nr_pages - 1);
1713 nr_pages = 1;
1714 }
1715
1716 /*
1717 * The page is mapped into the page tables of one or more
1718 * processes. Try to unmap it here.
1719 */
1720 if (page_mapped(page)) {
1721 enum ttu_flags flags = TTU_BATCH_FLUSH;
1722 bool was_swapbacked = PageSwapBacked(page);
1723
1724 if (unlikely(PageTransHuge(page)))
1725 flags |= TTU_SPLIT_HUGE_PMD;
1726
1727 try_to_unmap(page, flags);
1728 if (page_mapped(page)) {
1729 stat->nr_unmap_fail += nr_pages;
1730 if (!was_swapbacked && PageSwapBacked(page))
1731 stat->nr_lazyfree_fail += nr_pages;
1732 goto activate_locked;
1733 }
1734 }
1735
1736 if (PageDirty(page)) {
1737 /*
1738 * Only kswapd can writeback filesystem pages
1739 * to avoid risk of stack overflow. But avoid
1740 * injecting inefficient single-page IO into
1741 * flusher writeback as much as possible: only
1742 * write pages when we've encountered many
1743 * dirty pages, and when we've already scanned
1744 * the rest of the LRU for clean pages and see
1745 * the same dirty pages again (PageReclaim).
1746 */
1747 if (page_is_file_lru(page) &&
1748 (!current_is_kswapd() || !PageReclaim(page) ||
1749 !test_bit(PGDAT_DIRTY, &pgdat->flags))) {
1750 /*
1751 * Immediately reclaim when written back.
1752 * Similar in principal to deactivate_page()
1753 * except we already have the page isolated
1754 * and know it's dirty
1755 */
1756 inc_node_page_state(page, NR_VMSCAN_IMMEDIATE);
1757 SetPageReclaim(page);
1758
1759 goto activate_locked;
1760 }
1761
1762 if (references == PAGEREF_RECLAIM_CLEAN)
1763 goto keep_locked;
1764 if (!may_enter_fs)
1765 goto keep_locked;
1766 if (!sc->may_writepage)
1767 goto keep_locked;
1768
1769 /*
1770 * Page is dirty. Flush the TLB if a writable entry
1771 * potentially exists to avoid CPU writes after IO
1772 * starts and then write it out here.
1773 */
1774 try_to_unmap_flush_dirty();
1775 switch (pageout(page, mapping)) {
1776 case PAGE_KEEP:
1777 goto keep_locked;
1778 case PAGE_ACTIVATE:
1779 goto activate_locked;
1780 case PAGE_SUCCESS:
1781 stat->nr_pageout += thp_nr_pages(page);
1782
1783 if (PageWriteback(page))
1784 goto keep;
1785 if (PageDirty(page))
1786 goto keep;
1787
1788 /*
1789 * A synchronous write - probably a ramdisk. Go
1790 * ahead and try to reclaim the page.
1791 */
1792 if (!trylock_page(page))
1793 goto keep;
1794 if (PageDirty(page) || PageWriteback(page))
1795 goto keep_locked;
1796 mapping = page_mapping(page);
1797 fallthrough;
1798 case PAGE_CLEAN:
1799 ; /* try to free the page below */
1800 }
1801 }
1802
1803 /*
1804 * If the page has buffers, try to free the buffer mappings
1805 * associated with this page. If we succeed we try to free
1806 * the page as well.
1807 *
1808 * We do this even if the page is PageDirty().
1809 * try_to_release_page() does not perform I/O, but it is
1810 * possible for a page to have PageDirty set, but it is actually
1811 * clean (all its buffers are clean). This happens if the
1812 * buffers were written out directly, with submit_bh(). ext3
1813 * will do this, as well as the blockdev mapping.
1814 * try_to_release_page() will discover that cleanness and will
1815 * drop the buffers and mark the page clean - it can be freed.
1816 *
1817 * Rarely, pages can have buffers and no ->mapping. These are
1818 * the pages which were not successfully invalidated in
1819 * truncate_cleanup_page(). We try to drop those buffers here
1820 * and if that worked, and the page is no longer mapped into
1821 * process address space (page_count == 1) it can be freed.
1822 * Otherwise, leave the page on the LRU so it is swappable.
1823 */
1824 if (page_has_private(page)) {
1825 if (!try_to_release_page(page, sc->gfp_mask))
1826 goto activate_locked;
1827 if (!mapping && page_count(page) == 1) {
1828 unlock_page(page);
1829 if (put_page_testzero(page))
1830 goto free_it;
1831 else {
1832 /*
1833 * rare race with speculative reference.
1834 * the speculative reference will free
1835 * this page shortly, so we may
1836 * increment nr_reclaimed here (and
1837 * leave it off the LRU).
1838 */
1839 nr_reclaimed++;
1840 continue;
1841 }
1842 }
1843 }
1844
1845 if (PageAnon(page) && !PageSwapBacked(page)) {
1846 /* follow __remove_mapping for reference */
1847 if (!page_ref_freeze(page, 1))
1848 goto keep_locked;
1849 /*
1850 * The page has only one reference left, which is
1851 * from the isolation. After the caller puts the
1852 * page back on lru and drops the reference, the
1853 * page will be freed anyway. It doesn't matter
1854 * which lru it goes. So we don't bother checking
1855 * PageDirty here.
1856 */
1857 count_vm_event(PGLAZYFREED);
1858 count_memcg_page_event(page, PGLAZYFREED);
1859 } else if (!mapping || !__remove_mapping(mapping, page, true,
1860 sc->target_mem_cgroup))
1861 goto keep_locked;
1862
1863 unlock_page(page);
1864 free_it:
1865 /*
1866 * THP may get swapped out in a whole, need account
1867 * all base pages.
1868 */
1869 nr_reclaimed += nr_pages;
1870
1871 /*
1872 * Is there need to periodically free_page_list? It would
1873 * appear not as the counts should be low
1874 */
1875 if (unlikely(PageTransHuge(page)))
1876 destroy_compound_page(page);
1877 else
1878 list_add(&page->lru, &free_pages);
1879 continue;
1880
1881 activate_locked_split:
1882 /*
1883 * The tail pages that are failed to add into swap cache
1884 * reach here. Fixup nr_scanned and nr_pages.
1885 */
1886 if (nr_pages > 1) {
1887 sc->nr_scanned -= (nr_pages - 1);
1888 nr_pages = 1;
1889 }
1890 activate_locked:
1891 /* Not a candidate for swapping, so reclaim swap space. */
1892 if (PageSwapCache(page) && (mem_cgroup_swap_full(page) ||
1893 PageMlocked(page)))
1894 try_to_free_swap(page);
1895 VM_BUG_ON_PAGE(PageActive(page), page);
1896 if (!PageMlocked(page)) {
1897 int type = page_is_file_lru(page);
1898 SetPageActive(page);
1899 stat->nr_activate[type] += nr_pages;
1900 count_memcg_page_event(page, PGACTIVATE);
1901 }
1902 keep_locked:
1903 unlock_page(page);
1904 keep:
1905 list_add(&page->lru, &ret_pages);
1906 VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
1907 }
1908 /* 'page_list' is always empty here */
1909
1910 /* Migrate pages selected for demotion */
1911 nr_reclaimed += demote_page_list(&demote_pages, pgdat);
1912 /* Pages that could not be demoted are still in @demote_pages */
1913 if (!list_empty(&demote_pages)) {
1914 /* Pages which failed to demoted go back on @page_list for retry: */
1915 list_splice_init(&demote_pages, page_list);
1916 do_demote_pass = false;
1917 goto retry;
1918 }
1919
1920 pgactivate = stat->nr_activate[0] + stat->nr_activate[1];
1921
1922 mem_cgroup_uncharge_list(&free_pages);
1923 try_to_unmap_flush();
1924 free_unref_page_list(&free_pages);
1925
1926 list_splice(&ret_pages, page_list);
1927 count_vm_events(PGACTIVATE, pgactivate);
1928
1929 return nr_reclaimed;
1930 }
1931

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@xxxxxxxxxxxx

Attachment: .config.gz
Description: application/gzip