summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/mm_types.h33
-rw-r--r--mm/huge_memory.c20
-rw-r--r--mm/migrate.c6
3 files changed, 47 insertions, 12 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7f384bb62d8e..36ea3cf7d85e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -531,23 +531,44 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm)
*/
static inline bool mm_tlb_flush_pending(struct mm_struct *mm)
{
- barrier();
+ /*
+ * Must be called with PTL held; such that our PTL acquire will have
+ * observed the store from set_tlb_flush_pending().
+ */
return mm->tlb_flush_pending;
}
static inline void set_tlb_flush_pending(struct mm_struct *mm)
{
mm->tlb_flush_pending = true;
-
/*
- * Guarantee that the tlb_flush_pending store does not leak into the
- * critical section updating the page tables
+ * The only time this value is relevant is when there are indeed pages
+ * to flush. And we'll only flush pages after changing them, which
+ * requires the PTL.
+ *
+ * So the ordering here is:
+ *
+ * mm->tlb_flush_pending = true;
+ * spin_lock(&ptl);
+ * ...
+ * set_pte_at();
+ * spin_unlock(&ptl);
+ *
+ * spin_lock(&ptl)
+ * mm_tlb_flush_pending();
+ * ....
+ * spin_unlock(&ptl);
+ *
+ * flush_tlb_range();
+ * mm->tlb_flush_pending = false;
+ *
+ * So the =true store is constrained by the PTL unlock, and the =false
+ * store is constrained by the TLB invalidate.
*/
- smp_mb__before_spinlock();
}
/* Clearing is done after a TLB flush, which also provides a barrier. */
static inline void clear_tlb_flush_pending(struct mm_struct *mm)
{
- barrier();
+ /* see set_tlb_flush_pending */
mm->tlb_flush_pending = false;
}
#else
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 86975dec0ba1..c76a720b936b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1410,6 +1410,7 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
unsigned long haddr = vmf->address & HPAGE_PMD_MASK;
int page_nid = -1, this_nid = numa_node_id();
int target_nid, last_cpupid = -1;
+ bool need_flush = false;
bool page_locked;
bool migrated = false;
bool was_writable;
@@ -1496,10 +1497,29 @@ int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
}
/*
+ * Since we took the NUMA fault, we must have observed the !accessible
+ * bit. Make sure all other CPUs agree with that, to avoid them
+ * modifying the page we're about to migrate.
+ *
+ * Must be done under PTL such that we'll observe the relevant
+ * set_tlb_flush_pending().
+ */
+ if (mm_tlb_flush_pending(vma->vm_mm))
+ need_flush = true;
+
+ /*
* Migrate the THP to the requested node, returns with page unlocked
* and access rights restored.
*/
spin_unlock(vmf->ptl);
+
+ /*
+ * We are not sure a pending tlb flush here is for a huge page
+ * mapping or not. Hence use the tlb range variant
+ */
+ if (need_flush)
+ flush_tlb_range(vma, haddr, haddr + HPAGE_PMD_SIZE);
+
migrated = migrate_misplaced_transhuge_page(vma->vm_mm, vma,
vmf->pmd, pmd, vmf->address, page, target_nid);
if (migrated) {
diff --git a/mm/migrate.c b/mm/migrate.c
index 627671551873..d68a41da6abb 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1937,12 +1937,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
put_page(new_page);
goto out_fail;
}
- /*
- * We are not sure a pending tlb flush here is for a huge page
- * mapping or not. Hence use the tlb range variant
- */
- if (mm_tlb_flush_pending(mm))
- flush_tlb_range(vma, mmun_start, mmun_end);
/* Prepare a page as a migration target */
__SetPageLocked(new_page);