[GIT PULL] x86/mm changes for v3.2

From: Ingo Molnar
Date: Wed Oct 26 2011 - 11:37:35 EST


Linus,

Please pull the latest x86-mm-for-linus git tree from:

git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86-mm-for-linus

Thanks,

Ingo

------------------>
Stefano Stabellini (5):
x86, mm: Calculate precisely the memory needed by init_memory_mapping
Revert "x86,xen: introduce x86_init.mapping.pagetable_reserve"
x86, init : Move memblock_x86_reserve_range PGTABLE to find_early_table_space
x86-64, mm: Do not assume head_64.S used 4KB pages when !use_pse
x86_32: Calculate additional memory needed by the fixmap


arch/x86/include/asm/pgtable_types.h | 1 -
arch/x86/include/asm/x86_init.h | 12 ---
arch/x86/kernel/x86_init.c | 4 -
arch/x86/mm/init.c | 147 +++++++++++++++++++++++++---------
arch/x86/xen/mmu.c | 15 ----
5 files changed, 109 insertions(+), 70 deletions(-)

diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h
index d56187c..7db7723 100644
--- a/arch/x86/include/asm/pgtable_types.h
+++ b/arch/x86/include/asm/pgtable_types.h
@@ -299,7 +299,6 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
/* Install a pte for a particular vaddr in kernel space. */
void set_pte_vaddr(unsigned long vaddr, pte_t pte);

-extern void native_pagetable_reserve(u64 start, u64 end);
#ifdef CONFIG_X86_32
extern void native_pagetable_setup_start(pgd_t *base);
extern void native_pagetable_setup_done(pgd_t *base);
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index d3d8590..643ebf2 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -68,17 +68,6 @@ struct x86_init_oem {
};

/**
- * struct x86_init_mapping - platform specific initial kernel pagetable setup
- * @pagetable_reserve: reserve a range of addresses for kernel pagetable usage
- *
- * For more details on the purpose of this hook, look in
- * init_memory_mapping and the commit that added it.
- */
-struct x86_init_mapping {
- void (*pagetable_reserve)(u64 start, u64 end);
-};
-
-/**
* struct x86_init_paging - platform specific paging functions
* @pagetable_setup_start: platform specific pre paging_init() call
* @pagetable_setup_done: platform specific post paging_init() call
@@ -134,7 +123,6 @@ struct x86_init_ops {
struct x86_init_mpparse mpparse;
struct x86_init_irqs irqs;
struct x86_init_oem oem;
- struct x86_init_mapping mapping;
struct x86_init_paging paging;
struct x86_init_timers timers;
struct x86_init_iommu iommu;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 6f164bd..6eee082 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -61,10 +61,6 @@ struct x86_init_ops x86_init __initdata = {
.banner = default_banner,
},

- .mapping = {
- .pagetable_reserve = native_pagetable_reserve,
- },
-
.paging = {
.pagetable_setup_start = native_pagetable_setup_start,
.pagetable_setup_done = native_pagetable_setup_done,
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 3032644..a90ccc4 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -28,22 +28,110 @@ int direct_gbpages
#endif
;

-static void __init find_early_table_space(unsigned long end, int use_pse,
- int use_gbpages)
+static unsigned long __init find_early_fixmap_space(void)
{
- unsigned long puds, pmds, ptes, tables, start = 0, good_end = end;
+ unsigned long size = 0;
+#ifdef CONFIG_X86_32
+ int kmap_begin_pmd_idx, kmap_end_pmd_idx;
+ int fixmap_begin_pmd_idx, fixmap_end_pmd_idx;
+ int btmap_begin_pmd_idx;
+
+ fixmap_begin_pmd_idx =
+ __fix_to_virt(__end_of_fixed_addresses - 1) >> PMD_SHIFT;
+ /*
+ * fixmap_end_pmd_idx is the end of the fixmap minus the PMD that
+ * has been defined in the data section by head_32.S (see
+ * initial_pg_fixmap).
+ * Note: This is similar to what early_ioremap_page_table_range_init
+ * does except that the "end" has PMD_SIZE expunged as per previous
+ * comment.
+ */
+ fixmap_end_pmd_idx = (FIXADDR_TOP - 1) >> PMD_SHIFT;
+ btmap_begin_pmd_idx = __fix_to_virt(FIX_BTMAP_BEGIN) >> PMD_SHIFT;
+ kmap_begin_pmd_idx = __fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
+ kmap_end_pmd_idx = __fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
+
+ size = fixmap_end_pmd_idx - fixmap_begin_pmd_idx;
+ /*
+ * early_ioremap_init has already allocated a PMD at
+ * btmap_begin_pmd_idx
+ */
+ if (btmap_begin_pmd_idx < fixmap_end_pmd_idx)
+ size--;
+
+#ifdef CONFIG_HIGHMEM
+ /*
+ * see page_table_kmap_check: if the kmap spans multiple PMDs, make
+ * sure the pte pages are allocated contiguously. It might need up
+ * to two additional pte pages to replace the page declared by
+ * head_32.S and the one allocated by early_ioremap_init, if they
+ * are even partially used for the kmap.
+ */
+ if (kmap_begin_pmd_idx != kmap_end_pmd_idx) {
+ if (kmap_end_pmd_idx == fixmap_end_pmd_idx)
+ size++;
+ if (btmap_begin_pmd_idx >= kmap_begin_pmd_idx &&
+ btmap_begin_pmd_idx <= kmap_end_pmd_idx)
+ size++;
+ }
+#endif
+#endif
+ return (size * PMD_SIZE + PAGE_SIZE - 1) >> PAGE_SHIFT;
+}
+
+static void __init find_early_table_space(unsigned long start,
+ unsigned long end, int use_pse, int use_gbpages)
+{
+ unsigned long pmds = 0, ptes = 0, tables = 0, good_end = end,
+ pud_mapped = 0, pmd_mapped = 0, size = end - start;
phys_addr_t base;

- puds = (end + PUD_SIZE - 1) >> PUD_SHIFT;
- tables = roundup(puds * sizeof(pud_t), PAGE_SIZE);
+ pud_mapped = DIV_ROUND_UP(PFN_PHYS(max_pfn_mapped),
+ (PUD_SIZE * PTRS_PER_PUD));
+ pud_mapped *= (PUD_SIZE * PTRS_PER_PUD);
+ pmd_mapped = DIV_ROUND_UP(PFN_PHYS(max_pfn_mapped),
+ (PMD_SIZE * PTRS_PER_PMD));
+ pmd_mapped *= (PMD_SIZE * PTRS_PER_PMD);
+
+ /*
+ * On x86_64 do not limit the size we need to cover with 4KB pages
+ * depending on the initial allocation because head_64.S always uses
+ * 2MB pages.
+ */
+#ifdef CONFIG_X86_32
+ if (start < PFN_PHYS(max_pfn_mapped)) {
+ if (PFN_PHYS(max_pfn_mapped) < end)
+ size -= PFN_PHYS(max_pfn_mapped) - start;
+ else
+ size = 0;
+ }
+#endif
+
+#ifndef __PAGETABLE_PUD_FOLDED
+ if (end > pud_mapped) {
+ unsigned long puds;
+ if (start < pud_mapped)
+ puds = (end - pud_mapped + PUD_SIZE - 1) >> PUD_SHIFT;
+ else
+ puds = (end - start + PUD_SIZE - 1) >> PUD_SHIFT;
+ tables += roundup(puds * sizeof(pud_t), PAGE_SIZE);
+ }
+#endif

if (use_gbpages) {
unsigned long extra;

extra = end - ((end>>PUD_SHIFT) << PUD_SHIFT);
pmds = (extra + PMD_SIZE - 1) >> PMD_SHIFT;
- } else
- pmds = (end + PMD_SIZE - 1) >> PMD_SHIFT;
+ }
+#ifndef __PAGETABLE_PMD_FOLDED
+ else if (end > pmd_mapped) {
+ if (start < pmd_mapped)
+ pmds = (end - pmd_mapped + PMD_SIZE - 1) >> PMD_SHIFT;
+ else
+ pmds = (end - start + PMD_SIZE - 1) >> PMD_SHIFT;
+ }
+#endif

tables += roundup(pmds * sizeof(pmd_t), PAGE_SIZE);

@@ -51,23 +139,22 @@ static void __init find_early_table_space(unsigned long end, int use_pse,
unsigned long extra;

extra = end - ((end>>PMD_SHIFT) << PMD_SHIFT);
-#ifdef CONFIG_X86_32
- extra += PMD_SIZE;
-#endif
ptes = (extra + PAGE_SIZE - 1) >> PAGE_SHIFT;
} else
- ptes = (end + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ ptes = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+ ptes += find_early_fixmap_space();

tables += roundup(ptes * sizeof(pte_t), PAGE_SIZE);

-#ifdef CONFIG_X86_32
- /* for fixmap */
- tables += roundup(__end_of_fixed_addresses * sizeof(pte_t), PAGE_SIZE);
+ if (!tables)
+ return;

+#ifdef CONFIG_X86_32
good_end = max_pfn_mapped << PAGE_SHIFT;
#endif

- base = memblock_find_in_range(start, good_end, tables, PAGE_SIZE);
+ base = memblock_find_in_range(0x00, good_end, tables, PAGE_SIZE);
if (base == MEMBLOCK_ERROR)
panic("Cannot find space for the kernel page tables");

@@ -77,11 +164,10 @@ static void __init find_early_table_space(unsigned long end, int use_pse,

printk(KERN_DEBUG "kernel direct mapping tables up to %lx @ %lx-%lx\n",
end, pgt_buf_start << PAGE_SHIFT, pgt_buf_top << PAGE_SHIFT);
-}

-void __init native_pagetable_reserve(u64 start, u64 end)
-{
- memblock_x86_reserve_range(start, end, "PGTABLE");
+ if (pgt_buf_top > pgt_buf_start)
+ memblock_x86_reserve_range(pgt_buf_start << PAGE_SHIFT,
+ pgt_buf_top << PAGE_SHIFT, "PGTABLE");
}

struct map_range {
@@ -261,7 +347,7 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,
* nodes are discovered.
*/
if (!after_bootmem)
- find_early_table_space(end, use_pse, use_gbpages);
+ find_early_table_space(start, end, use_pse, use_gbpages);

for (i = 0; i < nr_range; i++)
ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
@@ -275,24 +361,9 @@ unsigned long __init_refok init_memory_mapping(unsigned long start,

__flush_tlb_all();

- /*
- * Reserve the kernel pagetable pages we used (pgt_buf_start -
- * pgt_buf_end) and free the other ones (pgt_buf_end - pgt_buf_top)
- * so that they can be reused for other purposes.
- *
- * On native it just means calling memblock_x86_reserve_range, on Xen it
- * also means marking RW the pagetable pages that we allocated before
- * but that haven't been used.
- *
- * In fact on xen we mark RO the whole range pgt_buf_start -
- * pgt_buf_top, because we have to make sure that when
- * init_memory_mapping reaches the pagetable pages area, it maps
- * RO all the pagetable pages, including the ones that are beyond
- * pgt_buf_end at that time.
- */
- if (!after_bootmem && pgt_buf_end > pgt_buf_start)
- x86_init.mapping.pagetable_reserve(PFN_PHYS(pgt_buf_start),
- PFN_PHYS(pgt_buf_end));
+ if (pgt_buf_end != pgt_buf_top)
+ printk(KERN_DEBUG "initial kernel pagetable allocation wasted %lx"
+ " pages\n", pgt_buf_top - pgt_buf_end);

if (!after_bootmem)
early_memtest(start, end);
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index dc708dc..2004f1e 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1153,20 +1153,6 @@ static void __init xen_pagetable_setup_start(pgd_t *base)
{
}

-static __init void xen_mapping_pagetable_reserve(u64 start, u64 end)
-{
- /* reserve the range used */
- native_pagetable_reserve(start, end);
-
- /* set as RW the rest */
- printk(KERN_DEBUG "xen: setting RW the range %llx - %llx\n", end,
- PFN_PHYS(pgt_buf_top));
- while (end < PFN_PHYS(pgt_buf_top)) {
- make_lowmem_page_readwrite(__va(end));
- end += PAGE_SIZE;
- }
-}
-
static void xen_post_allocator_init(void);

static void __init xen_pagetable_setup_done(pgd_t *base)
@@ -1997,7 +1983,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {

void __init xen_init_mmu_ops(void)
{
- x86_init.mapping.pagetable_reserve = xen_mapping_pagetable_reserve;
x86_init.paging.pagetable_setup_start = xen_pagetable_setup_start;
x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done;
pv_mmu_ops = xen_mmu_ops;
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/