extern void device_scan(void);
+#define MAX_PHYS_ADDRESS (1UL << 42UL)
+#define KPTE_BITMAP_CHUNK_SZ (256UL * 1024UL * 1024UL)
+#define KPTE_BITMAP_BYTES \
+ ((MAX_PHYS_ADDRESS / KPTE_BITMAP_CHUNK_SZ) / 8)
+
+unsigned long kern_linear_pte_xor[2] __read_mostly;
+
+/* A bitmap, one bit for every 256MB of physical memory. If the bit
+ * is clear, we should use a 4MB page (via kern_linear_pte_xor[0]) else
+ * if set we should use a 256MB page (via kern_linear_pte_xor[1]).
+ */
+unsigned long kpte_linear_bitmap[KPTE_BITMAP_BYTES / sizeof(unsigned long)];
+
+/* A special kernel TSB for 4MB and 256MB linear mappings. */
+struct tsb swapper_4m_tsb[KERNEL_TSB4M_NENTRIES];
+
#define MAX_BANKS 32
static struct linux_prom64_registers pavail[MAX_BANKS] __initdata;
unsigned long *sparc64_valid_addr_bitmap __read_mostly;
-/* Ugly, but necessary... -DaveM */
-unsigned long phys_base __read_mostly;
+/* Kernel physical address base and size in bytes. */
unsigned long kern_base __read_mostly;
unsigned long kern_size __read_mostly;
-unsigned long pfn_base __read_mostly;
-unsigned long kern_linear_pte_xor __read_mostly;
/* get_new_mmu_context() uses "cache + 1". */
DEFINE_SPINLOCK(ctx_alloc_lock);
clear_page(addr);
}
+extern void tsb_cache_init(void);
+
void pgtable_cache_init(void)
{
pgtable_cache = kmem_cache_create("pgtable_cache",
zero_ctor,
NULL);
if (!pgtable_cache) {
- prom_printf("pgtable_cache_init(): Could not create!\n");
+ prom_printf("Could not create pgtable_cache\n");
prom_halt();
}
+ tsb_cache_init();
}
#ifdef CONFIG_DEBUG_DCFLUSH
#endif
#endif
-__inline__ void flush_dcache_page_impl(struct page *page)
+inline void flush_dcache_page_impl(struct page *page)
{
+ BUG_ON(tlb_type == hypervisor);
#ifdef CONFIG_DEBUG_DCFLUSH
atomic_inc(&dcpage_flushes);
#endif
}
#define PG_dcache_dirty PG_arch_1
-#define PG_dcache_cpu_shift 24
-#define PG_dcache_cpu_mask (256 - 1)
+#define PG_dcache_cpu_shift 24UL
+#define PG_dcache_cpu_mask (256UL - 1UL)
#if NR_CPUS > 256
#error D-cache dirty tracking and thread_info->cpu need fixing for > 256 cpus
void update_mmu_cache(struct vm_area_struct *vma, unsigned long address, pte_t pte)
{
struct mm_struct *mm;
- struct page *page;
- unsigned long pfn;
- unsigned long pg_flags;
-
- pfn = pte_pfn(pte);
- if (pfn_valid(pfn) &&
- (page = pfn_to_page(pfn), page_mapping(page)) &&
- ((pg_flags = page->flags) & (1UL << PG_dcache_dirty))) {
- int cpu = ((pg_flags >> PG_dcache_cpu_shift) &
- PG_dcache_cpu_mask);
- int this_cpu = get_cpu();
-
- /* This is just to optimize away some function calls
- * in the SMP case.
- */
- if (cpu == this_cpu)
- flush_dcache_page_impl(page);
- else
- smp_flush_dcache_page_impl(page, cpu);
+ struct tsb *tsb;
+ unsigned long tag, flags;
+ unsigned long tsb_index, tsb_hash_shift;
+
+ if (tlb_type != hypervisor) {
+ unsigned long pfn = pte_pfn(pte);
+ unsigned long pg_flags;
+ struct page *page;
+
+ if (pfn_valid(pfn) &&
+ (page = pfn_to_page(pfn), page_mapping(page)) &&
+ ((pg_flags = page->flags) & (1UL << PG_dcache_dirty))) {
+ int cpu = ((pg_flags >> PG_dcache_cpu_shift) &
+ PG_dcache_cpu_mask);
+ int this_cpu = get_cpu();
+
+ /* This is just to optimize away some function calls
+ * in the SMP case.
+ */
+ if (cpu == this_cpu)
+ flush_dcache_page_impl(page);
+ else
+ smp_flush_dcache_page_impl(page, cpu);
- clear_dcache_dirty_cpu(page, cpu);
+ clear_dcache_dirty_cpu(page, cpu);
- put_cpu();
+ put_cpu();
+ }
}
mm = vma->vm_mm;
- if ((pte_val(pte) & _PAGE_ALL_SZ_BITS) == _PAGE_SZBITS) {
- struct tsb *tsb;
- unsigned long tag;
-
- tsb = &mm->context.tsb[(address >> PAGE_SHIFT) &
- (mm->context.tsb_nentries - 1UL)];
- tag = (address >> 22UL) | CTX_HWBITS(mm->context) << 48UL;
- tsb_insert(tsb, tag, pte_val(pte));
+
+ tsb_index = MM_TSB_BASE;
+ tsb_hash_shift = PAGE_SHIFT;
+
+ spin_lock_irqsave(&mm->context.lock, flags);
+
+#ifdef CONFIG_HUGETLB_PAGE
+ if (mm->context.tsb_block[MM_TSB_HUGE].tsb != NULL) {
+ if ((tlb_type == hypervisor &&
+ (pte_val(pte) & _PAGE_SZALL_4V) == _PAGE_SZHUGE_4V) ||
+ (tlb_type != hypervisor &&
+ (pte_val(pte) & _PAGE_SZALL_4U) == _PAGE_SZHUGE_4U)) {
+ tsb_index = MM_TSB_HUGE;
+ tsb_hash_shift = HPAGE_SHIFT;
+ }
}
+#endif
+
+ tsb = mm->context.tsb_block[tsb_index].tsb;
+ tsb += ((address >> tsb_hash_shift) &
+ (mm->context.tsb_block[tsb_index].tsb_nentries - 1UL));
+ tag = (address >> 22UL);
+ tsb_insert(tsb, tag, pte_val(pte));
+
+ spin_unlock_irqrestore(&mm->context.lock, flags);
}
void flush_dcache_page(struct page *page)
struct address_space *mapping;
int this_cpu;
+ if (tlb_type == hypervisor)
+ return;
+
/* Do not bother with the expensive D-cache flush if it
* is merely the zero page. The 'bigcore' testcase in GDB
* causes this case to run millions of times.
}
}
-unsigned long page_to_pfn(struct page *page)
-{
- return (unsigned long) ((page - mem_map) + pfn_base);
-}
-
-struct page *pfn_to_page(unsigned long pfn)
-{
- return (mem_map + (pfn - pfn_base));
-}
-
void show_mem(void)
{
printk("Mem-info:\n");
"=&r" (arg3)
: "0" (func), "1" (arg0), "2" (arg1),
"3" (arg2), "4" (arg3));
+ if (arg0 != 0) {
+ prom_printf("hypervisor_tlb_lock[%lx:%lx:%lx:%lx]: "
+ "errors with %lx\n", vaddr, 0, pte, mmu, arg0);
+ prom_halt();
+ }
}
static unsigned long kern_large_tte(unsigned long paddr);
* let the user have CTX 0 (nucleus) or we ever use a CTX
* version of zero (and thus NO_CONTEXT would not be caught
* by version mis-match tests in mmu_context.h).
+ *
+ * Always invoked with interrupts disabled.
*/
void get_new_mmu_context(struct mm_struct *mm)
{
unsigned long ctx, new_ctx;
unsigned long orig_pgsz_bits;
-
+ unsigned long flags;
+ int new_version;
- spin_lock(&ctx_alloc_lock);
+ spin_lock_irqsave(&ctx_alloc_lock, flags);
orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
+ new_version = 0;
if (new_ctx >= (1 << CTX_NR_BITS)) {
new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
if (new_ctx >= ctx) {
mmu_context_bmap[i + 2] = 0;
mmu_context_bmap[i + 3] = 0;
}
+ new_version = 1;
goto out;
}
}
out:
tlb_context_cache = new_ctx;
mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
- spin_unlock(&ctx_alloc_lock);
+ spin_unlock_irqrestore(&ctx_alloc_lock, flags);
+
+ if (unlikely(new_version))
+ smp_new_mmu_context_version();
}
void sparc_ultra_dump_itlb(void)
extern unsigned long cmdline_memory_size;
-unsigned long __init bootmem_init(unsigned long *pages_avail)
+/* Find a free area for the bootmem map, avoiding the kernel image
+ * and the initial ramdisk.
+ */
+static unsigned long __init choose_bootmap_pfn(unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ unsigned long avoid_start, avoid_end, bootmap_size;
+ int i;
+
+ bootmap_size = ((end_pfn - start_pfn) + 7) / 8;
+ bootmap_size = ALIGN(bootmap_size, sizeof(long));
+
+ avoid_start = avoid_end = 0;
+#ifdef CONFIG_BLK_DEV_INITRD
+ avoid_start = initrd_start;
+ avoid_end = PAGE_ALIGN(initrd_end);
+#endif
+
+#ifdef CONFIG_DEBUG_BOOTMEM
+ prom_printf("choose_bootmap_pfn: kern[%lx:%lx] avoid[%lx:%lx]\n",
+ kern_base, PAGE_ALIGN(kern_base + kern_size),
+ avoid_start, avoid_end);
+#endif
+ for (i = 0; i < pavail_ents; i++) {
+ unsigned long start, end;
+
+ start = pavail[i].phys_addr;
+ end = start + pavail[i].reg_size;
+
+ while (start < end) {
+ if (start >= kern_base &&
+ start < PAGE_ALIGN(kern_base + kern_size)) {
+ start = PAGE_ALIGN(kern_base + kern_size);
+ continue;
+ }
+ if (start >= avoid_start && start < avoid_end) {
+ start = avoid_end;
+ continue;
+ }
+
+ if ((end - start) < bootmap_size)
+ break;
+
+ if (start < kern_base &&
+ (start + bootmap_size) > kern_base) {
+ start = PAGE_ALIGN(kern_base + kern_size);
+ continue;
+ }
+
+ if (start < avoid_start &&
+ (start + bootmap_size) > avoid_start) {
+ start = avoid_end;
+ continue;
+ }
+
+ /* OK, it doesn't overlap anything, use it. */
+#ifdef CONFIG_DEBUG_BOOTMEM
+ prom_printf("choose_bootmap_pfn: Using %lx [%lx]\n",
+ start >> PAGE_SHIFT, start);
+#endif
+ return start >> PAGE_SHIFT;
+ }
+ }
+
+ prom_printf("Cannot find free area for bootmap, aborting.\n");
+ prom_halt();
+}
+
+static unsigned long __init bootmem_init(unsigned long *pages_avail,
+ unsigned long phys_base)
{
- unsigned long bootmap_size, start_pfn, end_pfn;
+ unsigned long bootmap_size, end_pfn;
unsigned long end_of_phys_memory = 0UL;
unsigned long bootmap_pfn, bytes_avail, size;
int i;
*pages_avail = bytes_avail >> PAGE_SHIFT;
- /* Start with page aligned address of last symbol in kernel
- * image. The kernel is hard mapped below PAGE_OFFSET in a
- * 4MB locked TLB translation.
- */
- start_pfn = PAGE_ALIGN(kern_base + kern_size) >> PAGE_SHIFT;
-
- bootmap_pfn = start_pfn;
-
end_pfn = end_of_phys_memory >> PAGE_SHIFT;
#ifdef CONFIG_BLK_DEV_INITRD
"(0x%016lx > 0x%016lx)\ndisabling initrd\n",
initrd_end, end_of_phys_memory);
initrd_start = 0;
- }
- if (initrd_start) {
- if (initrd_start >= (start_pfn << PAGE_SHIFT) &&
- initrd_start < (start_pfn << PAGE_SHIFT) + 2 * PAGE_SIZE)
- bootmap_pfn = PAGE_ALIGN (initrd_end) >> PAGE_SHIFT;
+ initrd_end = 0;
}
}
#endif
/* Initialize the boot-time allocator. */
max_pfn = max_low_pfn = end_pfn;
- min_low_pfn = pfn_base;
+ min_low_pfn = (phys_base >> PAGE_SHIFT);
+
+ bootmap_pfn = choose_bootmap_pfn(min_low_pfn, end_pfn);
#ifdef CONFIG_DEBUG_BOOTMEM
prom_printf("init_bootmem(min[%lx], bootmap[%lx], max[%lx])\n",
min_low_pfn, bootmap_pfn, max_low_pfn);
#endif
- bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn, pfn_base, end_pfn);
+ bootmap_size = init_bootmem_node(NODE_DATA(0), bootmap_pfn,
+ min_low_pfn, end_pfn);
/* Now register the available physical memory with the
* allocator.
reserve_bootmem((bootmap_pfn << PAGE_SHIFT), size);
*pages_avail -= PAGE_ALIGN(size) >> PAGE_SHIFT;
+ for (i = 0; i < pavail_ents; i++) {
+ unsigned long start_pfn, end_pfn;
+
+ start_pfn = pavail[i].phys_addr >> PAGE_SHIFT;
+ end_pfn = (start_pfn + (pavail[i].reg_size >> PAGE_SHIFT));
+#ifdef CONFIG_DEBUG_BOOTMEM
+ prom_printf("memory_present(0, %lx, %lx)\n",
+ start_pfn, end_pfn);
+#endif
+ memory_present(0, start_pfn, end_pfn);
+ }
+
+ sparse_init();
+
return end_pfn;
}
+static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
+static int pall_ents __initdata;
+
#ifdef CONFIG_DEBUG_PAGEALLOC
static unsigned long kernel_map_range(unsigned long pstart, unsigned long pend, pgprot_t prot)
{
return alloc_bytes;
}
-static struct linux_prom64_registers pall[MAX_BANKS] __initdata;
-static int pall_ents __initdata;
-
extern unsigned int kvmap_linear_patch[1];
+#endif /* CONFIG_DEBUG_PAGEALLOC */
+
+static void __init mark_kpte_bitmap(unsigned long start, unsigned long end)
+{
+ const unsigned long shift_256MB = 28;
+ const unsigned long mask_256MB = ((1UL << shift_256MB) - 1UL);
+ const unsigned long size_256MB = (1UL << shift_256MB);
+
+ while (start < end) {
+ long remains;
+
+ remains = end - start;
+ if (remains < size_256MB)
+ break;
+
+ if (start & mask_256MB) {
+ start = (start + size_256MB) & ~mask_256MB;
+ continue;
+ }
+
+ while (remains >= size_256MB) {
+ unsigned long index = start >> shift_256MB;
+
+ __set_bit(index, kpte_linear_bitmap);
+
+ start += size_256MB;
+ remains -= size_256MB;
+ }
+ }
+}
static void __init kernel_physical_mapping_init(void)
{
- unsigned long i, mem_alloced = 0UL;
+ unsigned long i;
+#ifdef CONFIG_DEBUG_PAGEALLOC
+ unsigned long mem_alloced = 0UL;
+#endif
read_obp_memory("reg", &pall[0], &pall_ents);
phys_start = pall[i].phys_addr;
phys_end = phys_start + pall[i].reg_size;
+
+ mark_kpte_bitmap(phys_start, phys_end);
+
+#ifdef CONFIG_DEBUG_PAGEALLOC
mem_alloced += kernel_map_range(phys_start, phys_end,
PAGE_KERNEL);
+#endif
}
+#ifdef CONFIG_DEBUG_PAGEALLOC
printk("Allocated %ld bytes for kernel page tables.\n",
mem_alloced);
flushi(&kvmap_linear_patch[0]);
__flush_tlb_all();
+#endif
}
+#ifdef CONFIG_DEBUG_PAGEALLOC
void kernel_map_pages(struct page *page, int numpages, int enable)
{
unsigned long phys_start = page_to_pfn(page) << PAGE_SHIFT;
{
unsigned long ktsb_pa;
+ /* First KTSB for PAGE_SIZE mappings. */
ktsb_pa = kern_base + ((unsigned long)&swapper_tsb[0] - KERNBASE);
switch (PAGE_SIZE) {
ktsb_descr[0].tsb_base = ktsb_pa;
ktsb_descr[0].resv = 0;
- /* XXX When we have a kernel large page size TSB, describe
- * XXX it in ktsb_descr[1] here.
- */
+ /* Second KTSB for 4MB/256MB mappings. */
+ ktsb_pa = (kern_base +
+ ((unsigned long)&swapper_4m_tsb[0] - KERNBASE));
+
+ ktsb_descr[1].pgsz_idx = HV_PGSZ_IDX_4MB;
+ ktsb_descr[1].pgsz_mask = (HV_PGSZ_MASK_4MB |
+ HV_PGSZ_MASK_256MB);
+ ktsb_descr[1].assoc = 1;
+ ktsb_descr[1].num_ttes = KERNEL_TSB4M_NENTRIES;
+ ktsb_descr[1].ctx_idx = 0;
+ ktsb_descr[1].tsb_base = ktsb_pa;
+ ktsb_descr[1].resv = 0;
}
void __cpuinit sun4v_ktsb_register(void)
pa = kern_base + ((unsigned long)&ktsb_descr[0] - KERNBASE);
func = HV_FAST_MMU_TSB_CTX0;
- /* XXX set arg0 to 2 when we use ktsb_descr[1], see above XXX */
- arg0 = 1;
+ arg0 = 2;
arg1 = pa;
__asm__ __volatile__("ta %6"
: "=&r" (func), "=&r" (arg0), "=&r" (arg1)
void __init paging_init(void)
{
- unsigned long end_pfn, pages_avail, shift;
+ unsigned long end_pfn, pages_avail, shift, phys_base;
unsigned long real_end, i;
kern_base = (prom_boot_mapping_phys_low >> 22UL) << 22UL;
kern_size = (unsigned long)&_end - (unsigned long)KERNBASE;
+ /* Invalidate both kernel TSBs. */
+ memset(swapper_tsb, 0x40, sizeof(swapper_tsb));
+ memset(swapper_4m_tsb, 0x40, sizeof(swapper_4m_tsb));
+
if (tlb_type == hypervisor)
sun4v_pgprot_init();
else
for (i = 0; i < pavail_ents; i++)
phys_base = min(phys_base, pavail[i].phys_addr);
- pfn_base = phys_base >> PAGE_SHIFT;
-
set_bit(0, mmu_context_bmap);
shift = kern_base + PAGE_OFFSET - ((unsigned long)KERNBASE);
/* Setup bootmem... */
pages_avail = 0;
- last_valid_pfn = end_pfn = bootmem_init(&pages_avail);
+ last_valid_pfn = end_pfn = bootmem_init(&pages_avail, phys_base);
+
+ max_mapnr = last_valid_pfn;
-#ifdef CONFIG_DEBUG_PAGEALLOC
kernel_physical_mapping_init();
-#endif
{
unsigned long zones_size[MAX_NR_ZONES];
unsigned long zholes_size[MAX_NR_ZONES];
- unsigned long npages;
int znum;
for (znum = 0; znum < MAX_NR_ZONES; znum++)
zones_size[znum] = zholes_size[znum] = 0;
- npages = end_pfn - pfn_base;
- zones_size[ZONE_DMA] = npages;
- zholes_size[ZONE_DMA] = npages - pages_avail;
+ zones_size[ZONE_DMA] = end_pfn;
+ zholes_size[ZONE_DMA] = end_pfn - pages_avail;
free_area_init_node(0, &contig_page_data, zones_size,
- phys_base >> PAGE_SHIFT, zholes_size);
+ __pa(PAGE_OFFSET) >> PAGE_SHIFT,
+ zholes_size);
}
device_scan();
taint_real_pages();
- max_mapnr = last_valid_pfn - pfn_base;
high_memory = __va(last_valid_pfn << PAGE_SHIFT);
#ifdef CONFIG_DEBUG_BOOTMEM
p = virt_to_page(page);
ClearPageReserved(p);
- set_page_count(p, 1);
+ init_page_count(p);
__free_page(p);
num_physpages++;
totalram_pages++;
struct page *p = virt_to_page(start);
ClearPageReserved(p);
- set_page_count(p, 1);
+ init_page_count(p);
__free_page(p);
num_physpages++;
totalram_pages++;
pgprot_t PAGE_KERNEL_LOCKED __read_mostly;
pgprot_t PAGE_COPY __read_mostly;
+
+pgprot_t PAGE_SHARED __read_mostly;
+EXPORT_SYMBOL(PAGE_SHARED);
+
pgprot_t PAGE_EXEC __read_mostly;
unsigned long pg_iobits __read_mostly;
unsigned long _PAGE_IE __read_mostly;
+
unsigned long _PAGE_E __read_mostly;
+EXPORT_SYMBOL(_PAGE_E);
+
unsigned long _PAGE_CACHE __read_mostly;
+EXPORT_SYMBOL(_PAGE_CACHE);
static void prot_init_common(unsigned long page_none,
unsigned long page_shared,
unsigned long page_exec_bit)
{
PAGE_COPY = __pgprot(page_copy);
+ PAGE_SHARED = __pgprot(page_shared);
protection_map[0x0] = __pgprot(page_none);
protection_map[0x1] = __pgprot(page_readonly & ~page_exec_bit);
pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4U | __DIRTY_BITS_4U |
__ACCESS_BITS_4U | _PAGE_E_4U);
- kern_linear_pte_xor = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^
+ kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4U) ^
0xfffff80000000000;
- kern_linear_pte_xor |= (_PAGE_CP_4U | _PAGE_CV_4U |
- _PAGE_P_4U | _PAGE_W_4U);
+ kern_linear_pte_xor[0] |= (_PAGE_CP_4U | _PAGE_CV_4U |
+ _PAGE_P_4U | _PAGE_W_4U);
+
+ /* XXX Should use 256MB on Panther. XXX */
+ kern_linear_pte_xor[1] = kern_linear_pte_xor[0];
_PAGE_SZBITS = _PAGE_SZBITS_4U;
_PAGE_ALL_SZ_BITS = (_PAGE_SZ4MB_4U | _PAGE_SZ512K_4U |
_PAGE_E = _PAGE_E_4V;
_PAGE_CACHE = _PAGE_CACHE_4V;
- kern_linear_pte_xor = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
+ kern_linear_pte_xor[0] = (_PAGE_VALID | _PAGE_SZ4MB_4V) ^
+ 0xfffff80000000000;
+ kern_linear_pte_xor[0] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+ _PAGE_P_4V | _PAGE_W_4V);
+
+ kern_linear_pte_xor[1] = (_PAGE_VALID | _PAGE_SZ256MB_4V) ^
0xfffff80000000000;
- kern_linear_pte_xor |= (_PAGE_CP_4V | _PAGE_CV_4V |
- _PAGE_P_4V | _PAGE_W_4V);
+ kern_linear_pte_xor[1] |= (_PAGE_CP_4V | _PAGE_CV_4V |
+ _PAGE_P_4V | _PAGE_W_4V);
pg_iobits = (_PAGE_VALID | _PAGE_PRESENT_4V | __DIRTY_BITS_4V |
__ACCESS_BITS_4V | _PAGE_E_4V);
__asm__ __volatile__("wrpr %0, 0, %%pstate"
: : "r" (pstate));
}
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+
+void online_page(struct page *page)
+{
+ ClearPageReserved(page);
+ init_page_count(page);
+ __free_page(page);
+ totalram_pages++;
+ num_physpages++;
+}
+
+int remove_memory(u64 start, u64 size)
+{
+ return -EINVAL;
+}
+
+#endif /* CONFIG_MEMORY_HOTPLUG */