On 26 Apr 21:55, Guillaume Morin wrote:
On 26 Apr 9:19, David Hildenbrand wrote:
A couple of points:
a) Don't use page_mapcount(). Either folio_mapcount(), but likely you want
to check PageAnonExclusive.
b) If you're not following the can_follow_write_pte/_pmd model, you are
doing something wrong :)
c) The code was heavily changed in mm/mm-unstable. It was merged with t
the common code.
Likely, in mm/mm-unstable, the existing can_follow_write_pte and
can_follow_write_pmd checks will already cover what you want in most cases.
We'd need a can_follow_write_pud() to cover follow_huge_pud() and
(unfortunately) something to handle follow_hugepd() as well similarly.
Copy-pasting what we do in can_follow_write_pte() and adjusting for
different PTE types is the right thing to do. Maybe now it's time to factor
out the common checks into a separate helper.
I tried to get the hugepd stuff right but this was the first I heard
about it :-) Afaict follow_huge_pmd and friends were already DTRT
I got it working on top of your uprobes-cow branch with the foll force
patch sent friday. Still pretty lightly tested
I went with using one write uprobe function with some additional
branches. I went back and forth between that and making them 2 different
functions.
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 2f4e88552d3f..8a33e380f7ea 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -83,6 +83,10 @@ static const struct fs_parameter_spec hugetlb_fs_parameters[] = {
{}
};
+bool hugetlbfs_mapping(struct address_space *mapping) {
+ return mapping->a_ops == &hugetlbfs_aops;
}
-static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len)
+static void copy_from_page(struct page *page, unsigned long vaddr, void *dst, int len, unsigned long page_mask)
{
void *kaddr = kmap_atomic(page);
- memcpy(dst, kaddr + (vaddr & ~PAGE_MASK), len);
+ memcpy(dst, kaddr + (vaddr & ~page_mask), len);
kunmap_atomic(kaddr);
}
-static void copy_to_page(struct page *page, unsigned long vaddr, const void *src, int len)
+static void copy_to_page(struct page *page, unsigned long vaddr, const void *src, int len, unsigned long page_mask)
{
void *kaddr = kmap_atomic(page);
- memcpy(kaddr + (vaddr & ~PAGE_MASK), src, len);
+ memcpy(kaddr + (vaddr & ~page_mask), src, len);
kunmap_atomic(kaddr);
}
-static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode)
+static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t *new_opcode, unsigned long page_mask)
{
uprobe_opcode_t old_opcode;
bool is_swbp;
@@ -191,7 +192,8 @@ static int verify_opcode(struct page *page, unsigned long vaddr, uprobe_opcode_t
* is a trap variant; uprobes always wins over any other (gdb)
* breakpoint.
*/
- copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE);
+ copy_from_page(page, vaddr, &old_opcode, UPROBE_SWBP_INSN_SIZE,
+ page_mask);
is_swbp = is_swbp_insn(&old_opcode);
if (is_swbp_insn(new_opcode)) {
@@ -376,8 +378,8 @@ struct uwo_data {
uprobe_opcode_t opcode;
};
-static int __write_opcode_pte(pte_t *ptep, unsigned long vaddr,
- unsigned long next, struct mm_walk *walk)
+static int __write_opcode(pte_t *ptep, unsigned long vaddr,
+ unsigned long page_mask, struct mm_walk *walk)
{
struct uwo_data *data = walk->private;;
const bool is_register = !!is_swbp_insn(&data->opcode);
@@ -415,9 +417,12 @@ static int __write_opcode_pte(pte_t *ptep, unsigned long vaddr,
/* Unmap + flush the TLB, such that we can write atomically .*/
flush_cache_page(vma, vaddr, pte_pfn(pte));
- pte = ptep_clear_flush(vma, vaddr, ptep);
+ if (folio_test_hugetlb(folio))
+ pte = huge_ptep_clear_flush(vma, vaddr, ptep);
+ else
+ pte = ptep_clear_flush(vma, vaddr, ptep);
copy_to_page(page, data->opcode_vaddr, &data->opcode,
- UPROBE_SWBP_INSN_SIZE);
+ UPROBE_SWBP_INSN_SIZE, page_mask);
/* When unregistering, we may only zap a PTE if uffd is disabled ... */
if (is_register || userfaultfd_missing(vma))
@@ -443,13 +448,18 @@ static int __write_opcode_pte(pte_t *ptep, unsigned long vaddr,
if (!identical || folio_maybe_dma_pinned(folio))
goto remap;
- /* Zap it and try to reclaim swap space. */
- dec_mm_counter(mm, MM_ANONPAGES);
- folio_remove_rmap_pte(folio, page, vma);
- if (!folio_mapped(folio) && folio_test_swapcache(folio) &&
- folio_trylock(folio)) {
- folio_free_swap(folio);
- folio_unlock(folio);
+ if (folio_test_hugetlb(folio)) {
+ hugetlb_remove_rmap(folio);
+ large = false;
+ } else {
+ /* Zap it and try to reclaim swap space. */
+ dec_mm_counter(mm, MM_ANONPAGES);
+ folio_remove_rmap_pte(folio, page, vma);
+ if (!folio_mapped(folio) && folio_test_swapcache(folio) &&
+ folio_trylock(folio)) {
+ folio_free_swap(folio);
+ folio_unlock(folio);
+ }
}
folio_put(folio);
@@ -461,11 +471,29 @@ static int __write_opcode_pte(pte_t *ptep, unsigned long vaddr,
*/
smp_wmb();
/* We modified the page. Make sure to mark the PTE dirty. */
- set_pte_at(mm, vaddr, ptep, pte_mkdirty(pte));
+ if (folio_test_hugetlb(folio))
+ set_huge_pte_at(mm , vaddr, ptep, huge_pte_mkdirty(pte),
+ (~page_mask) + 1);
+ else
+ set_pte_at(mm, vaddr, ptep, pte_mkdirty(pte));
return UWO_DONE;
}
+static int __write_opcode_hugetlb(pte_t *ptep, unsigned long hmask,
+ unsigned long vaddr,
+ unsigned long next, struct mm_walk *walk)
+{
+ return __write_opcode(ptep, vaddr, hmask, walk);
+}
+
+static int __write_opcode_pte(pte_t *ptep, unsigned long vaddr,
+ unsigned long next, struct mm_walk *walk)
+{
+ return __write_opcode(ptep, vaddr, PAGE_MASK, walk);
+}
+
static const struct mm_walk_ops write_opcode_ops = {
+ .hugetlb_entry = __write_opcode_hugetlb,
.pte_entry = __write_opcode_pte,
.walk_lock = PGWALK_WRLOCK,
};
@@ -492,7 +520,7 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma,
unsigned long opcode_vaddr, uprobe_opcode_t opcode)
{
struct uprobe *uprobe = container_of(auprobe, struct uprobe, arch);
- const unsigned long vaddr = opcode_vaddr & PAGE_MASK;
+ unsigned long vaddr = opcode_vaddr & PAGE_MASK;
const bool is_register = !!is_swbp_insn(&opcode);
struct uwo_data data = {
.opcode = opcode,
@@ -503,6 +531,7 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma,
struct mmu_notifier_range range;
int ret, ref_ctr_updated = 0;
struct page *page;
+ unsigned long page_size = PAGE_SIZE;
if (WARN_ON_ONCE(!is_cow_mapping(vma->vm_flags)))
return -EINVAL;
@@ -521,7 +550,14 @@ int uprobe_write_opcode(struct arch_uprobe *auprobe, struct vm_area_struct *vma,
if (ret != 1)
goto out;
- ret = verify_opcode(page, opcode_vaddr, &opcode);
+
+ if (is_vm_hugetlb_page(vma)) {
+ struct hstate *h = hstate_vma(vma);
+ page_size = huge_page_size(h);
+ vaddr &= huge_page_mask(h);
+ page = compound_head(page);
+ }
+ ret = verify_opcode(page, opcode_vaddr, &opcode, ~(page_size - 1));
put_page(page);
if (ret <= 0)
goto out;