sparc32: Move cache and TLB flushes over to method ops.
[sfrench/cifs-2.6.git] / arch / sparc / mm / srmmu.c
1 /*
2  * srmmu.c:  SRMMU specific routines for memory management.
3  *
4  * Copyright (C) 1995 David S. Miller  (davem@caip.rutgers.edu)
5  * Copyright (C) 1995,2002 Pete Zaitcev (zaitcev@yahoo.com)
6  * Copyright (C) 1996 Eddie C. Dost    (ecd@skynet.be)
7  * Copyright (C) 1997,1998 Jakub Jelinek (jj@sunsite.mff.cuni.cz)
8  * Copyright (C) 1999,2000 Anton Blanchard (anton@samba.org)
9  */
10
11 #include <linux/kernel.h>
12 #include <linux/mm.h>
13 #include <linux/vmalloc.h>
14 #include <linux/pagemap.h>
15 #include <linux/init.h>
16 #include <linux/spinlock.h>
17 #include <linux/bootmem.h>
18 #include <linux/fs.h>
19 #include <linux/seq_file.h>
20 #include <linux/kdebug.h>
21 #include <linux/log2.h>
22 #include <linux/gfp.h>
23
24 #include <asm/bitext.h>
25 #include <asm/page.h>
26 #include <asm/pgalloc.h>
27 #include <asm/pgtable.h>
28 #include <asm/io.h>
29 #include <asm/vaddrs.h>
30 #include <asm/traps.h>
31 #include <asm/smp.h>
32 #include <asm/mbus.h>
33 #include <asm/cache.h>
34 #include <asm/oplib.h>
35 #include <asm/asi.h>
36 #include <asm/msi.h>
37 #include <asm/mmu_context.h>
38 #include <asm/io-unit.h>
39 #include <asm/cacheflush.h>
40 #include <asm/tlbflush.h>
41
42 /* Now the cpu specific definitions. */
43 #include <asm/viking.h>
44 #include <asm/mxcc.h>
45 #include <asm/ross.h>
46 #include <asm/tsunami.h>
47 #include <asm/swift.h>
48 #include <asm/turbosparc.h>
49 #include <asm/leon.h>
50
51 #include <asm/btfixup.h>
52
53 enum mbus_module srmmu_modtype;
54 static unsigned int hwbug_bitmask;
55 int vac_cache_size;
56 int vac_line_size;
57
58 struct ctx_list *ctx_list_pool;
59 struct ctx_list ctx_free;
60 struct ctx_list ctx_used;
61
62 extern struct resource sparc_iomap;
63
64 extern unsigned long last_valid_pfn;
65
66 static pgd_t *srmmu_swapper_pg_dir;
67
68 const struct sparc32_cachetlb_ops *sparc32_cachetlb_ops;
69
70 #ifdef CONFIG_SMP
71 const struct sparc32_cachetlb_ops *local_ops;
72
73 #define FLUSH_BEGIN(mm)
74 #define FLUSH_END
75 #else
76 #define FLUSH_BEGIN(mm) if ((mm)->context != NO_CONTEXT) {
77 #define FLUSH_END       }
78 #endif
79
80 int flush_page_for_dma_global = 1;
81
82 char *srmmu_name;
83
84 ctxd_t *srmmu_ctx_table_phys;
85 static ctxd_t *srmmu_context_table;
86
87 int viking_mxcc_present;
88 static DEFINE_SPINLOCK(srmmu_context_spinlock);
89
90 static int is_hypersparc;
91
92 static int srmmu_cache_pagetables;
93
94 /* these will be initialized in srmmu_nocache_calcsize() */
95 static unsigned long srmmu_nocache_size;
96 static unsigned long srmmu_nocache_end;
97
98 /* 1 bit <=> 256 bytes of nocache <=> 64 PTEs */
99 #define SRMMU_NOCACHE_BITMAP_SHIFT (PAGE_SHIFT - 4)
100
101 /* The context table is a nocache user with the biggest alignment needs. */
102 #define SRMMU_NOCACHE_ALIGN_MAX (sizeof(ctxd_t)*SRMMU_MAX_CONTEXTS)
103
104 void *srmmu_nocache_pool;
105 void *srmmu_nocache_bitmap;
106 static struct bit_map srmmu_nocache_map;
107
108 static inline int srmmu_pte_none(pte_t pte)
109 { return !(pte_val(pte) & 0xFFFFFFF); }
110
111 static inline int srmmu_pmd_none(pmd_t pmd)
112 { return !(pmd_val(pmd) & 0xFFFFFFF); }
113
114 static inline pte_t srmmu_pte_wrprotect(pte_t pte)
115 { return __pte(pte_val(pte) & ~SRMMU_WRITE);}
116
117 static inline pte_t srmmu_pte_mkclean(pte_t pte)
118 { return __pte(pte_val(pte) & ~SRMMU_DIRTY);}
119
120 static inline pte_t srmmu_pte_mkold(pte_t pte)
121 { return __pte(pte_val(pte) & ~SRMMU_REF);}
122
123 /* XXX should we hyper_flush_whole_icache here - Anton */
124 static inline void srmmu_ctxd_set(ctxd_t *ctxp, pgd_t *pgdp)
125 { set_pte((pte_t *)ctxp, (SRMMU_ET_PTD | (__nocache_pa((unsigned long) pgdp) >> 4))); }
126
127 void pmd_set(pmd_t *pmdp, pte_t *ptep)
128 {
129         unsigned long ptp;      /* Physical address, shifted right by 4 */
130         int i;
131
132         ptp = __nocache_pa((unsigned long) ptep) >> 4;
133         for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++) {
134                 set_pte((pte_t *)&pmdp->pmdv[i], SRMMU_ET_PTD | ptp);
135                 ptp += (SRMMU_REAL_PTRS_PER_PTE*sizeof(pte_t) >> 4);
136         }
137 }
138
139 void pmd_populate(struct mm_struct *mm, pmd_t *pmdp, struct page *ptep)
140 {
141         unsigned long ptp;      /* Physical address, shifted right by 4 */
142         int i;
143
144         ptp = page_to_pfn(ptep) << (PAGE_SHIFT-4);      /* watch for overflow */
145         for (i = 0; i < PTRS_PER_PTE/SRMMU_REAL_PTRS_PER_PTE; i++) {
146                 set_pte((pte_t *)&pmdp->pmdv[i], SRMMU_ET_PTD | ptp);
147                 ptp += (SRMMU_REAL_PTRS_PER_PTE*sizeof(pte_t) >> 4);
148         }
149 }
150
151 static inline pte_t srmmu_pte_modify(pte_t pte, pgprot_t newprot)
152 { return __pte((pte_val(pte) & SRMMU_CHG_MASK) | pgprot_val(newprot)); }
153
154 /* to find an entry in a top-level page table... */
155 static inline pgd_t *srmmu_pgd_offset(struct mm_struct * mm, unsigned long address)
156 { return mm->pgd + (address >> SRMMU_PGDIR_SHIFT); }
157
158 /* Find an entry in the third-level page table.. */ 
159 pte_t *pte_offset_kernel(pmd_t * dir, unsigned long address)
160 {
161         void *pte;
162
163         pte = __nocache_va((dir->pmdv[0] & SRMMU_PTD_PMASK) << 4);
164         return (pte_t *) pte +
165             ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1));
166 }
167
168 /*
169  * size: bytes to allocate in the nocache area.
170  * align: bytes, number to align at.
171  * Returns the virtual address of the allocated area.
172  */
173 static unsigned long __srmmu_get_nocache(int size, int align)
174 {
175         int offset;
176
177         if (size < SRMMU_NOCACHE_BITMAP_SHIFT) {
178                 printk("Size 0x%x too small for nocache request\n", size);
179                 size = SRMMU_NOCACHE_BITMAP_SHIFT;
180         }
181         if (size & (SRMMU_NOCACHE_BITMAP_SHIFT-1)) {
182                 printk("Size 0x%x unaligned int nocache request\n", size);
183                 size += SRMMU_NOCACHE_BITMAP_SHIFT-1;
184         }
185         BUG_ON(align > SRMMU_NOCACHE_ALIGN_MAX);
186
187         offset = bit_map_string_get(&srmmu_nocache_map,
188                                         size >> SRMMU_NOCACHE_BITMAP_SHIFT,
189                                         align >> SRMMU_NOCACHE_BITMAP_SHIFT);
190         if (offset == -1) {
191                 printk("srmmu: out of nocache %d: %d/%d\n",
192                     size, (int) srmmu_nocache_size,
193                     srmmu_nocache_map.used << SRMMU_NOCACHE_BITMAP_SHIFT);
194                 return 0;
195         }
196
197         return (SRMMU_NOCACHE_VADDR + (offset << SRMMU_NOCACHE_BITMAP_SHIFT));
198 }
199
200 unsigned long srmmu_get_nocache(int size, int align)
201 {
202         unsigned long tmp;
203
204         tmp = __srmmu_get_nocache(size, align);
205
206         if (tmp)
207                 memset((void *)tmp, 0, size);
208
209         return tmp;
210 }
211
212 void srmmu_free_nocache(unsigned long vaddr, int size)
213 {
214         int offset;
215
216         if (vaddr < SRMMU_NOCACHE_VADDR) {
217                 printk("Vaddr %lx is smaller than nocache base 0x%lx\n",
218                     vaddr, (unsigned long)SRMMU_NOCACHE_VADDR);
219                 BUG();
220         }
221         if (vaddr+size > srmmu_nocache_end) {
222                 printk("Vaddr %lx is bigger than nocache end 0x%lx\n",
223                     vaddr, srmmu_nocache_end);
224                 BUG();
225         }
226         if (!is_power_of_2(size)) {
227                 printk("Size 0x%x is not a power of 2\n", size);
228                 BUG();
229         }
230         if (size < SRMMU_NOCACHE_BITMAP_SHIFT) {
231                 printk("Size 0x%x is too small\n", size);
232                 BUG();
233         }
234         if (vaddr & (size-1)) {
235                 printk("Vaddr %lx is not aligned to size 0x%x\n", vaddr, size);
236                 BUG();
237         }
238
239         offset = (vaddr - SRMMU_NOCACHE_VADDR) >> SRMMU_NOCACHE_BITMAP_SHIFT;
240         size = size >> SRMMU_NOCACHE_BITMAP_SHIFT;
241
242         bit_map_clear(&srmmu_nocache_map, offset, size);
243 }
244
245 static void srmmu_early_allocate_ptable_skeleton(unsigned long start,
246                                                  unsigned long end);
247
248 extern unsigned long probe_memory(void);        /* in fault.c */
249
250 /*
251  * Reserve nocache dynamically proportionally to the amount of
252  * system RAM. -- Tomas Szepe <szepe@pinerecords.com>, June 2002
253  */
254 static void srmmu_nocache_calcsize(void)
255 {
256         unsigned long sysmemavail = probe_memory() / 1024;
257         int srmmu_nocache_npages;
258
259         srmmu_nocache_npages =
260                 sysmemavail / SRMMU_NOCACHE_ALCRATIO / 1024 * 256;
261
262  /* P3 XXX The 4x overuse: corroborated by /proc/meminfo. */
263         // if (srmmu_nocache_npages < 256) srmmu_nocache_npages = 256;
264         if (srmmu_nocache_npages < SRMMU_MIN_NOCACHE_PAGES)
265                 srmmu_nocache_npages = SRMMU_MIN_NOCACHE_PAGES;
266
267         /* anything above 1280 blows up */
268         if (srmmu_nocache_npages > SRMMU_MAX_NOCACHE_PAGES)
269                 srmmu_nocache_npages = SRMMU_MAX_NOCACHE_PAGES;
270
271         srmmu_nocache_size = srmmu_nocache_npages * PAGE_SIZE;
272         srmmu_nocache_end = SRMMU_NOCACHE_VADDR + srmmu_nocache_size;
273 }
274
275 static void __init srmmu_nocache_init(void)
276 {
277         unsigned int bitmap_bits;
278         pgd_t *pgd;
279         pmd_t *pmd;
280         pte_t *pte;
281         unsigned long paddr, vaddr;
282         unsigned long pteval;
283
284         bitmap_bits = srmmu_nocache_size >> SRMMU_NOCACHE_BITMAP_SHIFT;
285
286         srmmu_nocache_pool = __alloc_bootmem(srmmu_nocache_size,
287                 SRMMU_NOCACHE_ALIGN_MAX, 0UL);
288         memset(srmmu_nocache_pool, 0, srmmu_nocache_size);
289
290         srmmu_nocache_bitmap = __alloc_bootmem(bitmap_bits >> 3, SMP_CACHE_BYTES, 0UL);
291         bit_map_init(&srmmu_nocache_map, srmmu_nocache_bitmap, bitmap_bits);
292
293         srmmu_swapper_pg_dir = (pgd_t *)__srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE);
294         memset(__nocache_fix(srmmu_swapper_pg_dir), 0, SRMMU_PGD_TABLE_SIZE);
295         init_mm.pgd = srmmu_swapper_pg_dir;
296
297         srmmu_early_allocate_ptable_skeleton(SRMMU_NOCACHE_VADDR, srmmu_nocache_end);
298
299         paddr = __pa((unsigned long)srmmu_nocache_pool);
300         vaddr = SRMMU_NOCACHE_VADDR;
301
302         while (vaddr < srmmu_nocache_end) {
303                 pgd = pgd_offset_k(vaddr);
304                 pmd = pmd_offset(__nocache_fix(pgd), vaddr);
305                 pte = pte_offset_kernel(__nocache_fix(pmd), vaddr);
306
307                 pteval = ((paddr >> 4) | SRMMU_ET_PTE | SRMMU_PRIV);
308
309                 if (srmmu_cache_pagetables)
310                         pteval |= SRMMU_CACHE;
311
312                 set_pte(__nocache_fix(pte), __pte(pteval));
313
314                 vaddr += PAGE_SIZE;
315                 paddr += PAGE_SIZE;
316         }
317
318         flush_cache_all();
319         flush_tlb_all();
320 }
321
322 pgd_t *get_pgd_fast(void)
323 {
324         pgd_t *pgd = NULL;
325
326         pgd = (pgd_t *)__srmmu_get_nocache(SRMMU_PGD_TABLE_SIZE, SRMMU_PGD_TABLE_SIZE);
327         if (pgd) {
328                 pgd_t *init = pgd_offset_k(0);
329                 memset(pgd, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
330                 memcpy(pgd + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
331                                                 (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
332         }
333
334         return pgd;
335 }
336
337 /*
338  * Hardware needs alignment to 256 only, but we align to whole page size
339  * to reduce fragmentation problems due to the buddy principle.
340  * XXX Provide actual fragmentation statistics in /proc.
341  *
342  * Alignments up to the page size are the same for physical and virtual
343  * addresses of the nocache area.
344  */
345 pgtable_t pte_alloc_one(struct mm_struct *mm, unsigned long address)
346 {
347         unsigned long pte;
348         struct page *page;
349
350         if ((pte = (unsigned long)pte_alloc_one_kernel(mm, address)) == 0)
351                 return NULL;
352         page = pfn_to_page( __nocache_pa(pte) >> PAGE_SHIFT );
353         pgtable_page_ctor(page);
354         return page;
355 }
356
357 void pte_free(struct mm_struct *mm, pgtable_t pte)
358 {
359         unsigned long p;
360
361         pgtable_page_dtor(pte);
362         p = (unsigned long)page_address(pte);   /* Cached address (for test) */
363         if (p == 0)
364                 BUG();
365         p = page_to_pfn(pte) << PAGE_SHIFT;     /* Physical address */
366         p = (unsigned long) __nocache_va(p);    /* Nocached virtual */
367         srmmu_free_nocache(p, PTE_SIZE);
368 }
369
370 /*
371  */
372 static inline void alloc_context(struct mm_struct *old_mm, struct mm_struct *mm)
373 {
374         struct ctx_list *ctxp;
375
376         ctxp = ctx_free.next;
377         if(ctxp != &ctx_free) {
378                 remove_from_ctx_list(ctxp);
379                 add_to_used_ctxlist(ctxp);
380                 mm->context = ctxp->ctx_number;
381                 ctxp->ctx_mm = mm;
382                 return;
383         }
384         ctxp = ctx_used.next;
385         if(ctxp->ctx_mm == old_mm)
386                 ctxp = ctxp->next;
387         if(ctxp == &ctx_used)
388                 panic("out of mmu contexts");
389         flush_cache_mm(ctxp->ctx_mm);
390         flush_tlb_mm(ctxp->ctx_mm);
391         remove_from_ctx_list(ctxp);
392         add_to_used_ctxlist(ctxp);
393         ctxp->ctx_mm->context = NO_CONTEXT;
394         ctxp->ctx_mm = mm;
395         mm->context = ctxp->ctx_number;
396 }
397
398 static inline void free_context(int context)
399 {
400         struct ctx_list *ctx_old;
401
402         ctx_old = ctx_list_pool + context;
403         remove_from_ctx_list(ctx_old);
404         add_to_free_ctxlist(ctx_old);
405 }
406
407
408 void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm,
409                struct task_struct *tsk)
410 {
411         if(mm->context == NO_CONTEXT) {
412                 spin_lock(&srmmu_context_spinlock);
413                 alloc_context(old_mm, mm);
414                 spin_unlock(&srmmu_context_spinlock);
415                 srmmu_ctxd_set(&srmmu_context_table[mm->context], mm->pgd);
416         }
417
418         if (sparc_cpu_model == sparc_leon)
419                 leon_switch_mm();
420
421         if (is_hypersparc)
422                 hyper_flush_whole_icache();
423
424         srmmu_set_context(mm->context);
425 }
426
427 /* Low level IO area allocation on the SRMMU. */
428 static inline void srmmu_mapioaddr(unsigned long physaddr,
429     unsigned long virt_addr, int bus_type)
430 {
431         pgd_t *pgdp;
432         pmd_t *pmdp;
433         pte_t *ptep;
434         unsigned long tmp;
435
436         physaddr &= PAGE_MASK;
437         pgdp = pgd_offset_k(virt_addr);
438         pmdp = pmd_offset(pgdp, virt_addr);
439         ptep = pte_offset_kernel(pmdp, virt_addr);
440         tmp = (physaddr >> 4) | SRMMU_ET_PTE;
441
442         /*
443          * I need to test whether this is consistent over all
444          * sun4m's.  The bus_type represents the upper 4 bits of
445          * 36-bit physical address on the I/O space lines...
446          */
447         tmp |= (bus_type << 28);
448         tmp |= SRMMU_PRIV;
449         __flush_page_to_ram(virt_addr);
450         set_pte(ptep, __pte(tmp));
451 }
452
453 void srmmu_mapiorange(unsigned int bus, unsigned long xpa,
454                       unsigned long xva, unsigned int len)
455 {
456         while (len != 0) {
457                 len -= PAGE_SIZE;
458                 srmmu_mapioaddr(xpa, xva, bus);
459                 xva += PAGE_SIZE;
460                 xpa += PAGE_SIZE;
461         }
462         flush_tlb_all();
463 }
464
465 static inline void srmmu_unmapioaddr(unsigned long virt_addr)
466 {
467         pgd_t *pgdp;
468         pmd_t *pmdp;
469         pte_t *ptep;
470
471         pgdp = pgd_offset_k(virt_addr);
472         pmdp = pmd_offset(pgdp, virt_addr);
473         ptep = pte_offset_kernel(pmdp, virt_addr);
474
475         /* No need to flush uncacheable page. */
476         __pte_clear(ptep);
477 }
478
479 void srmmu_unmapiorange(unsigned long virt_addr, unsigned int len)
480 {
481         while (len != 0) {
482                 len -= PAGE_SIZE;
483                 srmmu_unmapioaddr(virt_addr);
484                 virt_addr += PAGE_SIZE;
485         }
486         flush_tlb_all();
487 }
488
489 /*
490  * On the SRMMU we do not have the problems with limited tlb entries
491  * for mapping kernel pages, so we just take things from the free page
492  * pool.  As a side effect we are putting a little too much pressure
493  * on the gfp() subsystem.  This setup also makes the logic of the
494  * iommu mapping code a lot easier as we can transparently handle
495  * mappings on the kernel stack without any special code.
496  */
497 struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node)
498 {
499         struct thread_info *ret;
500
501         ret = (struct thread_info *)__get_free_pages(GFP_KERNEL,
502                                                      THREAD_INFO_ORDER);
503 #ifdef CONFIG_DEBUG_STACK_USAGE
504         if (ret)
505                 memset(ret, 0, PAGE_SIZE << THREAD_INFO_ORDER);
506 #endif /* DEBUG_STACK_USAGE */
507
508         return ret;
509 }
510
511 void free_thread_info(struct thread_info *ti)
512 {
513         free_pages((unsigned long)ti, THREAD_INFO_ORDER);
514 }
515
516 /* tsunami.S */
517 extern void tsunami_flush_cache_all(void);
518 extern void tsunami_flush_cache_mm(struct mm_struct *mm);
519 extern void tsunami_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
520 extern void tsunami_flush_cache_page(struct vm_area_struct *vma, unsigned long page);
521 extern void tsunami_flush_page_to_ram(unsigned long page);
522 extern void tsunami_flush_page_for_dma(unsigned long page);
523 extern void tsunami_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr);
524 extern void tsunami_flush_tlb_all(void);
525 extern void tsunami_flush_tlb_mm(struct mm_struct *mm);
526 extern void tsunami_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
527 extern void tsunami_flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
528 extern void tsunami_setup_blockops(void);
529
530 /* swift.S */
531 extern void swift_flush_cache_all(void);
532 extern void swift_flush_cache_mm(struct mm_struct *mm);
533 extern void swift_flush_cache_range(struct vm_area_struct *vma,
534                                     unsigned long start, unsigned long end);
535 extern void swift_flush_cache_page(struct vm_area_struct *vma, unsigned long page);
536 extern void swift_flush_page_to_ram(unsigned long page);
537 extern void swift_flush_page_for_dma(unsigned long page);
538 extern void swift_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr);
539 extern void swift_flush_tlb_all(void);
540 extern void swift_flush_tlb_mm(struct mm_struct *mm);
541 extern void swift_flush_tlb_range(struct vm_area_struct *vma,
542                                   unsigned long start, unsigned long end);
543 extern void swift_flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
544
545 #if 0  /* P3: deadwood to debug precise flushes on Swift. */
546 void swift_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
547 {
548         int cctx, ctx1;
549
550         page &= PAGE_MASK;
551         if ((ctx1 = vma->vm_mm->context) != -1) {
552                 cctx = srmmu_get_context();
553 /* Is context # ever different from current context? P3 */
554                 if (cctx != ctx1) {
555                         printk("flush ctx %02x curr %02x\n", ctx1, cctx);
556                         srmmu_set_context(ctx1);
557                         swift_flush_page(page);
558                         __asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
559                                         "r" (page), "i" (ASI_M_FLUSH_PROBE));
560                         srmmu_set_context(cctx);
561                 } else {
562                          /* Rm. prot. bits from virt. c. */
563                         /* swift_flush_cache_all(); */
564                         /* swift_flush_cache_page(vma, page); */
565                         swift_flush_page(page);
566
567                         __asm__ __volatile__("sta %%g0, [%0] %1\n\t" : :
568                                 "r" (page), "i" (ASI_M_FLUSH_PROBE));
569                         /* same as above: srmmu_flush_tlb_page() */
570                 }
571         }
572 }
573 #endif
574
575 /*
576  * The following are all MBUS based SRMMU modules, and therefore could
577  * be found in a multiprocessor configuration.  On the whole, these
578  * chips seems to be much more touchy about DVMA and page tables
579  * with respect to cache coherency.
580  */
581
582 /* Cypress flushes. */
583 static void cypress_flush_cache_all(void)
584 {
585         volatile unsigned long cypress_sucks;
586         unsigned long faddr, tagval;
587
588         flush_user_windows();
589         for(faddr = 0; faddr < 0x10000; faddr += 0x20) {
590                 __asm__ __volatile__("lda [%1 + %2] %3, %0\n\t" :
591                                      "=r" (tagval) :
592                                      "r" (faddr), "r" (0x40000),
593                                      "i" (ASI_M_DATAC_TAG));
594
595                 /* If modified and valid, kick it. */
596                 if((tagval & 0x60) == 0x60)
597                         cypress_sucks = *(unsigned long *)(0xf0020000 + faddr);
598         }
599 }
600
601 static void cypress_flush_cache_mm(struct mm_struct *mm)
602 {
603         register unsigned long a, b, c, d, e, f, g;
604         unsigned long flags, faddr;
605         int octx;
606
607         FLUSH_BEGIN(mm)
608         flush_user_windows();
609         local_irq_save(flags);
610         octx = srmmu_get_context();
611         srmmu_set_context(mm->context);
612         a = 0x20; b = 0x40; c = 0x60;
613         d = 0x80; e = 0xa0; f = 0xc0; g = 0xe0;
614
615         faddr = (0x10000 - 0x100);
616         goto inside;
617         do {
618                 faddr -= 0x100;
619         inside:
620                 __asm__ __volatile__("sta %%g0, [%0] %1\n\t"
621                                      "sta %%g0, [%0 + %2] %1\n\t"
622                                      "sta %%g0, [%0 + %3] %1\n\t"
623                                      "sta %%g0, [%0 + %4] %1\n\t"
624                                      "sta %%g0, [%0 + %5] %1\n\t"
625                                      "sta %%g0, [%0 + %6] %1\n\t"
626                                      "sta %%g0, [%0 + %7] %1\n\t"
627                                      "sta %%g0, [%0 + %8] %1\n\t" : :
628                                      "r" (faddr), "i" (ASI_M_FLUSH_CTX),
629                                      "r" (a), "r" (b), "r" (c), "r" (d),
630                                      "r" (e), "r" (f), "r" (g));
631         } while(faddr);
632         srmmu_set_context(octx);
633         local_irq_restore(flags);
634         FLUSH_END
635 }
636
637 static void cypress_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
638 {
639         struct mm_struct *mm = vma->vm_mm;
640         register unsigned long a, b, c, d, e, f, g;
641         unsigned long flags, faddr;
642         int octx;
643
644         FLUSH_BEGIN(mm)
645         flush_user_windows();
646         local_irq_save(flags);
647         octx = srmmu_get_context();
648         srmmu_set_context(mm->context);
649         a = 0x20; b = 0x40; c = 0x60;
650         d = 0x80; e = 0xa0; f = 0xc0; g = 0xe0;
651
652         start &= SRMMU_REAL_PMD_MASK;
653         while(start < end) {
654                 faddr = (start + (0x10000 - 0x100));
655                 goto inside;
656                 do {
657                         faddr -= 0x100;
658                 inside:
659                         __asm__ __volatile__("sta %%g0, [%0] %1\n\t"
660                                              "sta %%g0, [%0 + %2] %1\n\t"
661                                              "sta %%g0, [%0 + %3] %1\n\t"
662                                              "sta %%g0, [%0 + %4] %1\n\t"
663                                              "sta %%g0, [%0 + %5] %1\n\t"
664                                              "sta %%g0, [%0 + %6] %1\n\t"
665                                              "sta %%g0, [%0 + %7] %1\n\t"
666                                              "sta %%g0, [%0 + %8] %1\n\t" : :
667                                              "r" (faddr),
668                                              "i" (ASI_M_FLUSH_SEG),
669                                              "r" (a), "r" (b), "r" (c), "r" (d),
670                                              "r" (e), "r" (f), "r" (g));
671                 } while (faddr != start);
672                 start += SRMMU_REAL_PMD_SIZE;
673         }
674         srmmu_set_context(octx);
675         local_irq_restore(flags);
676         FLUSH_END
677 }
678
679 static void cypress_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
680 {
681         register unsigned long a, b, c, d, e, f, g;
682         struct mm_struct *mm = vma->vm_mm;
683         unsigned long flags, line;
684         int octx;
685
686         FLUSH_BEGIN(mm)
687         flush_user_windows();
688         local_irq_save(flags);
689         octx = srmmu_get_context();
690         srmmu_set_context(mm->context);
691         a = 0x20; b = 0x40; c = 0x60;
692         d = 0x80; e = 0xa0; f = 0xc0; g = 0xe0;
693
694         page &= PAGE_MASK;
695         line = (page + PAGE_SIZE) - 0x100;
696         goto inside;
697         do {
698                 line -= 0x100;
699         inside:
700                         __asm__ __volatile__("sta %%g0, [%0] %1\n\t"
701                                              "sta %%g0, [%0 + %2] %1\n\t"
702                                              "sta %%g0, [%0 + %3] %1\n\t"
703                                              "sta %%g0, [%0 + %4] %1\n\t"
704                                              "sta %%g0, [%0 + %5] %1\n\t"
705                                              "sta %%g0, [%0 + %6] %1\n\t"
706                                              "sta %%g0, [%0 + %7] %1\n\t"
707                                              "sta %%g0, [%0 + %8] %1\n\t" : :
708                                              "r" (line),
709                                              "i" (ASI_M_FLUSH_PAGE),
710                                              "r" (a), "r" (b), "r" (c), "r" (d),
711                                              "r" (e), "r" (f), "r" (g));
712         } while(line != page);
713         srmmu_set_context(octx);
714         local_irq_restore(flags);
715         FLUSH_END
716 }
717
718 /* Cypress is copy-back, at least that is how we configure it. */
719 static void cypress_flush_page_to_ram(unsigned long page)
720 {
721         register unsigned long a, b, c, d, e, f, g;
722         unsigned long line;
723
724         a = 0x20; b = 0x40; c = 0x60; d = 0x80; e = 0xa0; f = 0xc0; g = 0xe0;
725         page &= PAGE_MASK;
726         line = (page + PAGE_SIZE) - 0x100;
727         goto inside;
728         do {
729                 line -= 0x100;
730         inside:
731                 __asm__ __volatile__("sta %%g0, [%0] %1\n\t"
732                                      "sta %%g0, [%0 + %2] %1\n\t"
733                                      "sta %%g0, [%0 + %3] %1\n\t"
734                                      "sta %%g0, [%0 + %4] %1\n\t"
735                                      "sta %%g0, [%0 + %5] %1\n\t"
736                                      "sta %%g0, [%0 + %6] %1\n\t"
737                                      "sta %%g0, [%0 + %7] %1\n\t"
738                                      "sta %%g0, [%0 + %8] %1\n\t" : :
739                                      "r" (line),
740                                      "i" (ASI_M_FLUSH_PAGE),
741                                      "r" (a), "r" (b), "r" (c), "r" (d),
742                                      "r" (e), "r" (f), "r" (g));
743         } while(line != page);
744 }
745
746 /* Cypress is also IO cache coherent. */
747 static void cypress_flush_page_for_dma(unsigned long page)
748 {
749 }
750
751 /* Cypress has unified L2 VIPT, from which both instructions and data
752  * are stored.  It does not have an onboard icache of any sort, therefore
753  * no flush is necessary.
754  */
755 static void cypress_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
756 {
757 }
758
759 static void cypress_flush_tlb_all(void)
760 {
761         srmmu_flush_whole_tlb();
762 }
763
764 static void cypress_flush_tlb_mm(struct mm_struct *mm)
765 {
766         FLUSH_BEGIN(mm)
767         __asm__ __volatile__(
768         "lda    [%0] %3, %%g5\n\t"
769         "sta    %2, [%0] %3\n\t"
770         "sta    %%g0, [%1] %4\n\t"
771         "sta    %%g5, [%0] %3\n"
772         : /* no outputs */
773         : "r" (SRMMU_CTX_REG), "r" (0x300), "r" (mm->context),
774           "i" (ASI_M_MMUREGS), "i" (ASI_M_FLUSH_PROBE)
775         : "g5");
776         FLUSH_END
777 }
778
779 static void cypress_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
780 {
781         struct mm_struct *mm = vma->vm_mm;
782         unsigned long size;
783
784         FLUSH_BEGIN(mm)
785         start &= SRMMU_PGDIR_MASK;
786         size = SRMMU_PGDIR_ALIGN(end) - start;
787         __asm__ __volatile__(
788                 "lda    [%0] %5, %%g5\n\t"
789                 "sta    %1, [%0] %5\n"
790                 "1:\n\t"
791                 "subcc  %3, %4, %3\n\t"
792                 "bne    1b\n\t"
793                 " sta   %%g0, [%2 + %3] %6\n\t"
794                 "sta    %%g5, [%0] %5\n"
795         : /* no outputs */
796         : "r" (SRMMU_CTX_REG), "r" (mm->context), "r" (start | 0x200),
797           "r" (size), "r" (SRMMU_PGDIR_SIZE), "i" (ASI_M_MMUREGS),
798           "i" (ASI_M_FLUSH_PROBE)
799         : "g5", "cc");
800         FLUSH_END
801 }
802
803 static void cypress_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
804 {
805         struct mm_struct *mm = vma->vm_mm;
806
807         FLUSH_BEGIN(mm)
808         __asm__ __volatile__(
809         "lda    [%0] %3, %%g5\n\t"
810         "sta    %1, [%0] %3\n\t"
811         "sta    %%g0, [%2] %4\n\t"
812         "sta    %%g5, [%0] %3\n"
813         : /* no outputs */
814         : "r" (SRMMU_CTX_REG), "r" (mm->context), "r" (page & PAGE_MASK),
815           "i" (ASI_M_MMUREGS), "i" (ASI_M_FLUSH_PROBE)
816         : "g5");
817         FLUSH_END
818 }
819
820 /* viking.S */
821 extern void viking_flush_cache_all(void);
822 extern void viking_flush_cache_mm(struct mm_struct *mm);
823 extern void viking_flush_cache_range(struct vm_area_struct *vma, unsigned long start,
824                                      unsigned long end);
825 extern void viking_flush_cache_page(struct vm_area_struct *vma, unsigned long page);
826 extern void viking_flush_page_to_ram(unsigned long page);
827 extern void viking_flush_page_for_dma(unsigned long page);
828 extern void viking_flush_sig_insns(struct mm_struct *mm, unsigned long addr);
829 extern void viking_flush_page(unsigned long page);
830 extern void viking_mxcc_flush_page(unsigned long page);
831 extern void viking_flush_tlb_all(void);
832 extern void viking_flush_tlb_mm(struct mm_struct *mm);
833 extern void viking_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
834                                    unsigned long end);
835 extern void viking_flush_tlb_page(struct vm_area_struct *vma,
836                                   unsigned long page);
837 extern void sun4dsmp_flush_tlb_all(void);
838 extern void sun4dsmp_flush_tlb_mm(struct mm_struct *mm);
839 extern void sun4dsmp_flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
840                                    unsigned long end);
841 extern void sun4dsmp_flush_tlb_page(struct vm_area_struct *vma,
842                                   unsigned long page);
843
844 /* hypersparc.S */
845 extern void hypersparc_flush_cache_all(void);
846 extern void hypersparc_flush_cache_mm(struct mm_struct *mm);
847 extern void hypersparc_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
848 extern void hypersparc_flush_cache_page(struct vm_area_struct *vma, unsigned long page);
849 extern void hypersparc_flush_page_to_ram(unsigned long page);
850 extern void hypersparc_flush_page_for_dma(unsigned long page);
851 extern void hypersparc_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr);
852 extern void hypersparc_flush_tlb_all(void);
853 extern void hypersparc_flush_tlb_mm(struct mm_struct *mm);
854 extern void hypersparc_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end);
855 extern void hypersparc_flush_tlb_page(struct vm_area_struct *vma, unsigned long page);
856 extern void hypersparc_setup_blockops(void);
857
858 /*
859  * NOTE: All of this startup code assumes the low 16mb (approx.) of
860  *       kernel mappings are done with one single contiguous chunk of
861  *       ram.  On small ram machines (classics mainly) we only get
862  *       around 8mb mapped for us.
863  */
864
865 static void __init early_pgtable_allocfail(char *type)
866 {
867         prom_printf("inherit_prom_mappings: Cannot alloc kernel %s.\n", type);
868         prom_halt();
869 }
870
871 static void __init srmmu_early_allocate_ptable_skeleton(unsigned long start,
872                                                         unsigned long end)
873 {
874         pgd_t *pgdp;
875         pmd_t *pmdp;
876         pte_t *ptep;
877
878         while(start < end) {
879                 pgdp = pgd_offset_k(start);
880                 if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) {
881                         pmdp = (pmd_t *) __srmmu_get_nocache(
882                             SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE);
883                         if (pmdp == NULL)
884                                 early_pgtable_allocfail("pmd");
885                         memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE);
886                         pgd_set(__nocache_fix(pgdp), pmdp);
887                 }
888                 pmdp = pmd_offset(__nocache_fix(pgdp), start);
889                 if(srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
890                         ptep = (pte_t *)__srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
891                         if (ptep == NULL)
892                                 early_pgtable_allocfail("pte");
893                         memset(__nocache_fix(ptep), 0, PTE_SIZE);
894                         pmd_set(__nocache_fix(pmdp), ptep);
895                 }
896                 if (start > (0xffffffffUL - PMD_SIZE))
897                         break;
898                 start = (start + PMD_SIZE) & PMD_MASK;
899         }
900 }
901
902 static void __init srmmu_allocate_ptable_skeleton(unsigned long start,
903                                                   unsigned long end)
904 {
905         pgd_t *pgdp;
906         pmd_t *pmdp;
907         pte_t *ptep;
908
909         while(start < end) {
910                 pgdp = pgd_offset_k(start);
911                 if (pgd_none(*pgdp)) {
912                         pmdp = (pmd_t *)__srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE);
913                         if (pmdp == NULL)
914                                 early_pgtable_allocfail("pmd");
915                         memset(pmdp, 0, SRMMU_PMD_TABLE_SIZE);
916                         pgd_set(pgdp, pmdp);
917                 }
918                 pmdp = pmd_offset(pgdp, start);
919                 if(srmmu_pmd_none(*pmdp)) {
920                         ptep = (pte_t *) __srmmu_get_nocache(PTE_SIZE,
921                                                              PTE_SIZE);
922                         if (ptep == NULL)
923                                 early_pgtable_allocfail("pte");
924                         memset(ptep, 0, PTE_SIZE);
925                         pmd_set(pmdp, ptep);
926                 }
927                 if (start > (0xffffffffUL - PMD_SIZE))
928                         break;
929                 start = (start + PMD_SIZE) & PMD_MASK;
930         }
931 }
932
933 /*
934  * This is much cleaner than poking around physical address space
935  * looking at the prom's page table directly which is what most
936  * other OS's do.  Yuck... this is much better.
937  */
938 static void __init srmmu_inherit_prom_mappings(unsigned long start,
939                                                unsigned long end)
940 {
941         pgd_t *pgdp;
942         pmd_t *pmdp;
943         pte_t *ptep;
944         int what = 0; /* 0 = normal-pte, 1 = pmd-level pte, 2 = pgd-level pte */
945         unsigned long prompte;
946
947         while(start <= end) {
948                 if (start == 0)
949                         break; /* probably wrap around */
950                 if(start == 0xfef00000)
951                         start = KADB_DEBUGGER_BEGVM;
952                 if(!(prompte = srmmu_hwprobe(start))) {
953                         start += PAGE_SIZE;
954                         continue;
955                 }
956     
957                 /* A red snapper, see what it really is. */
958                 what = 0;
959     
960                 if(!(start & ~(SRMMU_REAL_PMD_MASK))) {
961                         if(srmmu_hwprobe((start-PAGE_SIZE) + SRMMU_REAL_PMD_SIZE) == prompte)
962                                 what = 1;
963                 }
964     
965                 if(!(start & ~(SRMMU_PGDIR_MASK))) {
966                         if(srmmu_hwprobe((start-PAGE_SIZE) + SRMMU_PGDIR_SIZE) ==
967                            prompte)
968                                 what = 2;
969                 }
970     
971                 pgdp = pgd_offset_k(start);
972                 if(what == 2) {
973                         *(pgd_t *)__nocache_fix(pgdp) = __pgd(prompte);
974                         start += SRMMU_PGDIR_SIZE;
975                         continue;
976                 }
977                 if (pgd_none(*(pgd_t *)__nocache_fix(pgdp))) {
978                         pmdp = (pmd_t *)__srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE);
979                         if (pmdp == NULL)
980                                 early_pgtable_allocfail("pmd");
981                         memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE);
982                         pgd_set(__nocache_fix(pgdp), pmdp);
983                 }
984                 pmdp = pmd_offset(__nocache_fix(pgdp), start);
985                 if(srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
986                         ptep = (pte_t *) __srmmu_get_nocache(PTE_SIZE,
987                                                              PTE_SIZE);
988                         if (ptep == NULL)
989                                 early_pgtable_allocfail("pte");
990                         memset(__nocache_fix(ptep), 0, PTE_SIZE);
991                         pmd_set(__nocache_fix(pmdp), ptep);
992                 }
993                 if(what == 1) {
994                         /*
995                          * We bend the rule where all 16 PTPs in a pmd_t point
996                          * inside the same PTE page, and we leak a perfectly
997                          * good hardware PTE piece. Alternatives seem worse.
998                          */
999                         unsigned int x; /* Index of HW PMD in soft cluster */
1000                         x = (start >> PMD_SHIFT) & 15;
1001                         *(unsigned long *)__nocache_fix(&pmdp->pmdv[x]) = prompte;
1002                         start += SRMMU_REAL_PMD_SIZE;
1003                         continue;
1004                 }
1005                 ptep = pte_offset_kernel(__nocache_fix(pmdp), start);
1006                 *(pte_t *)__nocache_fix(ptep) = __pte(prompte);
1007                 start += PAGE_SIZE;
1008         }
1009 }
1010
1011 #define KERNEL_PTE(page_shifted) ((page_shifted)|SRMMU_CACHE|SRMMU_PRIV|SRMMU_VALID)
1012
1013 /* Create a third-level SRMMU 16MB page mapping. */
1014 static void __init do_large_mapping(unsigned long vaddr, unsigned long phys_base)
1015 {
1016         pgd_t *pgdp = pgd_offset_k(vaddr);
1017         unsigned long big_pte;
1018
1019         big_pte = KERNEL_PTE(phys_base >> 4);
1020         *(pgd_t *)__nocache_fix(pgdp) = __pgd(big_pte);
1021 }
1022
1023 /* Map sp_bank entry SP_ENTRY, starting at virtual address VBASE. */
1024 static unsigned long __init map_spbank(unsigned long vbase, int sp_entry)
1025 {
1026         unsigned long pstart = (sp_banks[sp_entry].base_addr & SRMMU_PGDIR_MASK);
1027         unsigned long vstart = (vbase & SRMMU_PGDIR_MASK);
1028         unsigned long vend = SRMMU_PGDIR_ALIGN(vbase + sp_banks[sp_entry].num_bytes);
1029         /* Map "low" memory only */
1030         const unsigned long min_vaddr = PAGE_OFFSET;
1031         const unsigned long max_vaddr = PAGE_OFFSET + SRMMU_MAXMEM;
1032
1033         if (vstart < min_vaddr || vstart >= max_vaddr)
1034                 return vstart;
1035         
1036         if (vend > max_vaddr || vend < min_vaddr)
1037                 vend = max_vaddr;
1038
1039         while(vstart < vend) {
1040                 do_large_mapping(vstart, pstart);
1041                 vstart += SRMMU_PGDIR_SIZE; pstart += SRMMU_PGDIR_SIZE;
1042         }
1043         return vstart;
1044 }
1045
1046 static inline void memprobe_error(char *msg)
1047 {
1048         prom_printf(msg);
1049         prom_printf("Halting now...\n");
1050         prom_halt();
1051 }
1052
1053 static inline void map_kernel(void)
1054 {
1055         int i;
1056
1057         if (phys_base > 0) {
1058                 do_large_mapping(PAGE_OFFSET, phys_base);
1059         }
1060
1061         for (i = 0; sp_banks[i].num_bytes != 0; i++) {
1062                 map_spbank((unsigned long)__va(sp_banks[i].base_addr), i);
1063         }
1064 }
1065
1066 /* Paging initialization on the Sparc Reference MMU. */
1067 extern void sparc_context_init(int);
1068
1069 void (*poke_srmmu)(void) __cpuinitdata = NULL;
1070
1071 extern unsigned long bootmem_init(unsigned long *pages_avail);
1072
1073 void __init srmmu_paging_init(void)
1074 {
1075         int i;
1076         phandle cpunode;
1077         char node_str[128];
1078         pgd_t *pgd;
1079         pmd_t *pmd;
1080         pte_t *pte;
1081         unsigned long pages_avail;
1082
1083         sparc_iomap.start = SUN4M_IOBASE_VADDR; /* 16MB of IOSPACE on all sun4m's. */
1084
1085         if (sparc_cpu_model == sun4d)
1086                 num_contexts = 65536; /* We know it is Viking */
1087         else {
1088                 /* Find the number of contexts on the srmmu. */
1089                 cpunode = prom_getchild(prom_root_node);
1090                 num_contexts = 0;
1091                 while(cpunode != 0) {
1092                         prom_getstring(cpunode, "device_type", node_str, sizeof(node_str));
1093                         if(!strcmp(node_str, "cpu")) {
1094                                 num_contexts = prom_getintdefault(cpunode, "mmu-nctx", 0x8);
1095                                 break;
1096                         }
1097                         cpunode = prom_getsibling(cpunode);
1098                 }
1099         }
1100
1101         if(!num_contexts) {
1102                 prom_printf("Something wrong, can't find cpu node in paging_init.\n");
1103                 prom_halt();
1104         }
1105
1106         pages_avail = 0;
1107         last_valid_pfn = bootmem_init(&pages_avail);
1108
1109         srmmu_nocache_calcsize();
1110         srmmu_nocache_init();
1111         srmmu_inherit_prom_mappings(0xfe400000,(LINUX_OPPROM_ENDVM-PAGE_SIZE));
1112         map_kernel();
1113
1114         /* ctx table has to be physically aligned to its size */
1115         srmmu_context_table = (ctxd_t *)__srmmu_get_nocache(num_contexts*sizeof(ctxd_t), num_contexts*sizeof(ctxd_t));
1116         srmmu_ctx_table_phys = (ctxd_t *)__nocache_pa((unsigned long)srmmu_context_table);
1117
1118         for(i = 0; i < num_contexts; i++)
1119                 srmmu_ctxd_set((ctxd_t *)__nocache_fix(&srmmu_context_table[i]), srmmu_swapper_pg_dir);
1120
1121         flush_cache_all();
1122         srmmu_set_ctable_ptr((unsigned long)srmmu_ctx_table_phys);
1123 #ifdef CONFIG_SMP
1124         /* Stop from hanging here... */
1125         local_ops->tlb_all();
1126 #else
1127         flush_tlb_all();
1128 #endif
1129         poke_srmmu();
1130
1131         srmmu_allocate_ptable_skeleton(sparc_iomap.start, IOBASE_END);
1132         srmmu_allocate_ptable_skeleton(DVMA_VADDR, DVMA_END);
1133
1134         srmmu_allocate_ptable_skeleton(
1135                 __fix_to_virt(__end_of_fixed_addresses - 1), FIXADDR_TOP);
1136         srmmu_allocate_ptable_skeleton(PKMAP_BASE, PKMAP_END);
1137
1138         pgd = pgd_offset_k(PKMAP_BASE);
1139         pmd = pmd_offset(pgd, PKMAP_BASE);
1140         pte = pte_offset_kernel(pmd, PKMAP_BASE);
1141         pkmap_page_table = pte;
1142
1143         flush_cache_all();
1144         flush_tlb_all();
1145
1146         sparc_context_init(num_contexts);
1147
1148         kmap_init();
1149
1150         {
1151                 unsigned long zones_size[MAX_NR_ZONES];
1152                 unsigned long zholes_size[MAX_NR_ZONES];
1153                 unsigned long npages;
1154                 int znum;
1155
1156                 for (znum = 0; znum < MAX_NR_ZONES; znum++)
1157                         zones_size[znum] = zholes_size[znum] = 0;
1158
1159                 npages = max_low_pfn - pfn_base;
1160
1161                 zones_size[ZONE_DMA] = npages;
1162                 zholes_size[ZONE_DMA] = npages - pages_avail;
1163
1164                 npages = highend_pfn - max_low_pfn;
1165                 zones_size[ZONE_HIGHMEM] = npages;
1166                 zholes_size[ZONE_HIGHMEM] = npages - calc_highpages();
1167
1168                 free_area_init_node(0, zones_size, pfn_base, zholes_size);
1169         }
1170 }
1171
1172 void mmu_info(struct seq_file *m)
1173 {
1174         seq_printf(m, 
1175                    "MMU type\t: %s\n"
1176                    "contexts\t: %d\n"
1177                    "nocache total\t: %ld\n"
1178                    "nocache used\t: %d\n",
1179                    srmmu_name,
1180                    num_contexts,
1181                    srmmu_nocache_size,
1182                    srmmu_nocache_map.used << SRMMU_NOCACHE_BITMAP_SHIFT);
1183 }
1184
1185 void destroy_context(struct mm_struct *mm)
1186 {
1187
1188         if(mm->context != NO_CONTEXT) {
1189                 flush_cache_mm(mm);
1190                 srmmu_ctxd_set(&srmmu_context_table[mm->context], srmmu_swapper_pg_dir);
1191                 flush_tlb_mm(mm);
1192                 spin_lock(&srmmu_context_spinlock);
1193                 free_context(mm->context);
1194                 spin_unlock(&srmmu_context_spinlock);
1195                 mm->context = NO_CONTEXT;
1196         }
1197 }
1198
1199 /* Init various srmmu chip types. */
1200 static void __init srmmu_is_bad(void)
1201 {
1202         prom_printf("Could not determine SRMMU chip type.\n");
1203         prom_halt();
1204 }
1205
1206 static void __init init_vac_layout(void)
1207 {
1208         phandle nd;
1209         int cache_lines;
1210         char node_str[128];
1211 #ifdef CONFIG_SMP
1212         int cpu = 0;
1213         unsigned long max_size = 0;
1214         unsigned long min_line_size = 0x10000000;
1215 #endif
1216
1217         nd = prom_getchild(prom_root_node);
1218         while((nd = prom_getsibling(nd)) != 0) {
1219                 prom_getstring(nd, "device_type", node_str, sizeof(node_str));
1220                 if(!strcmp(node_str, "cpu")) {
1221                         vac_line_size = prom_getint(nd, "cache-line-size");
1222                         if (vac_line_size == -1) {
1223                                 prom_printf("can't determine cache-line-size, "
1224                                             "halting.\n");
1225                                 prom_halt();
1226                         }
1227                         cache_lines = prom_getint(nd, "cache-nlines");
1228                         if (cache_lines == -1) {
1229                                 prom_printf("can't determine cache-nlines, halting.\n");
1230                                 prom_halt();
1231                         }
1232
1233                         vac_cache_size = cache_lines * vac_line_size;
1234 #ifdef CONFIG_SMP
1235                         if(vac_cache_size > max_size)
1236                                 max_size = vac_cache_size;
1237                         if(vac_line_size < min_line_size)
1238                                 min_line_size = vac_line_size;
1239                         //FIXME: cpus not contiguous!!
1240                         cpu++;
1241                         if (cpu >= nr_cpu_ids || !cpu_online(cpu))
1242                                 break;
1243 #else
1244                         break;
1245 #endif
1246                 }
1247         }
1248         if(nd == 0) {
1249                 prom_printf("No CPU nodes found, halting.\n");
1250                 prom_halt();
1251         }
1252 #ifdef CONFIG_SMP
1253         vac_cache_size = max_size;
1254         vac_line_size = min_line_size;
1255 #endif
1256         printk("SRMMU: Using VAC size of %d bytes, line size %d bytes.\n",
1257                (int)vac_cache_size, (int)vac_line_size);
1258 }
1259
1260 static void __cpuinit poke_hypersparc(void)
1261 {
1262         volatile unsigned long clear;
1263         unsigned long mreg = srmmu_get_mmureg();
1264
1265         hyper_flush_unconditional_combined();
1266
1267         mreg &= ~(HYPERSPARC_CWENABLE);
1268         mreg |= (HYPERSPARC_CENABLE | HYPERSPARC_WBENABLE);
1269         mreg |= (HYPERSPARC_CMODE);
1270
1271         srmmu_set_mmureg(mreg);
1272
1273 #if 0 /* XXX I think this is bad news... -DaveM */
1274         hyper_clear_all_tags();
1275 #endif
1276
1277         put_ross_icr(HYPERSPARC_ICCR_FTD | HYPERSPARC_ICCR_ICE);
1278         hyper_flush_whole_icache();
1279         clear = srmmu_get_faddr();
1280         clear = srmmu_get_fstatus();
1281 }
1282
1283 static const struct sparc32_cachetlb_ops hypersparc_ops = {
1284         .cache_all      = hypersparc_flush_cache_all,
1285         .cache_mm       = hypersparc_flush_cache_mm,
1286         .cache_page     = hypersparc_flush_cache_page,
1287         .cache_range    = hypersparc_flush_cache_range,
1288         .tlb_all        = hypersparc_flush_tlb_all,
1289         .tlb_mm         = hypersparc_flush_tlb_mm,
1290         .tlb_page       = hypersparc_flush_tlb_page,
1291         .tlb_range      = hypersparc_flush_tlb_range,
1292         .page_to_ram    = hypersparc_flush_page_to_ram,
1293         .sig_insns      = hypersparc_flush_sig_insns,
1294         .page_for_dma   = hypersparc_flush_page_for_dma,
1295 };
1296
1297 static void __init init_hypersparc(void)
1298 {
1299         srmmu_name = "ROSS HyperSparc";
1300         srmmu_modtype = HyperSparc;
1301
1302         init_vac_layout();
1303
1304         is_hypersparc = 1;
1305         sparc32_cachetlb_ops = &hypersparc_ops;
1306
1307         poke_srmmu = poke_hypersparc;
1308
1309         hypersparc_setup_blockops();
1310 }
1311
1312 static void __cpuinit poke_cypress(void)
1313 {
1314         unsigned long mreg = srmmu_get_mmureg();
1315         unsigned long faddr, tagval;
1316         volatile unsigned long cypress_sucks;
1317         volatile unsigned long clear;
1318
1319         clear = srmmu_get_faddr();
1320         clear = srmmu_get_fstatus();
1321
1322         if (!(mreg & CYPRESS_CENABLE)) {
1323                 for(faddr = 0x0; faddr < 0x10000; faddr += 20) {
1324                         __asm__ __volatile__("sta %%g0, [%0 + %1] %2\n\t"
1325                                              "sta %%g0, [%0] %2\n\t" : :
1326                                              "r" (faddr), "r" (0x40000),
1327                                              "i" (ASI_M_DATAC_TAG));
1328                 }
1329         } else {
1330                 for(faddr = 0; faddr < 0x10000; faddr += 0x20) {
1331                         __asm__ __volatile__("lda [%1 + %2] %3, %0\n\t" :
1332                                              "=r" (tagval) :
1333                                              "r" (faddr), "r" (0x40000),
1334                                              "i" (ASI_M_DATAC_TAG));
1335
1336                         /* If modified and valid, kick it. */
1337                         if((tagval & 0x60) == 0x60)
1338                                 cypress_sucks = *(unsigned long *)
1339                                                         (0xf0020000 + faddr);
1340                 }
1341         }
1342
1343         /* And one more, for our good neighbor, Mr. Broken Cypress. */
1344         clear = srmmu_get_faddr();
1345         clear = srmmu_get_fstatus();
1346
1347         mreg |= (CYPRESS_CENABLE | CYPRESS_CMODE);
1348         srmmu_set_mmureg(mreg);
1349 }
1350
1351 static const struct sparc32_cachetlb_ops cypress_ops = {
1352         .cache_all      = cypress_flush_cache_all,
1353         .cache_mm       = cypress_flush_cache_mm,
1354         .cache_page     = cypress_flush_cache_page,
1355         .cache_range    = cypress_flush_cache_range,
1356         .tlb_all        = cypress_flush_tlb_all,
1357         .tlb_mm         = cypress_flush_tlb_mm,
1358         .tlb_page       = cypress_flush_tlb_page,
1359         .tlb_range      = cypress_flush_tlb_range,
1360         .page_to_ram    = cypress_flush_page_to_ram,
1361         .sig_insns      = cypress_flush_sig_insns,
1362         .page_for_dma   = cypress_flush_page_for_dma,
1363 };
1364
1365 static void __init init_cypress_common(void)
1366 {
1367         init_vac_layout();
1368         sparc32_cachetlb_ops = &cypress_ops;
1369         poke_srmmu = poke_cypress;
1370 }
1371
1372 static void __init init_cypress_604(void)
1373 {
1374         srmmu_name = "ROSS Cypress-604(UP)";
1375         srmmu_modtype = Cypress;
1376         init_cypress_common();
1377 }
1378
1379 static void __init init_cypress_605(unsigned long mrev)
1380 {
1381         srmmu_name = "ROSS Cypress-605(MP)";
1382         if(mrev == 0xe) {
1383                 srmmu_modtype = Cypress_vE;
1384                 hwbug_bitmask |= HWBUG_COPYBACK_BROKEN;
1385         } else {
1386                 if(mrev == 0xd) {
1387                         srmmu_modtype = Cypress_vD;
1388                         hwbug_bitmask |= HWBUG_ASIFLUSH_BROKEN;
1389                 } else {
1390                         srmmu_modtype = Cypress;
1391                 }
1392         }
1393         init_cypress_common();
1394 }
1395
1396 static void __cpuinit poke_swift(void)
1397 {
1398         unsigned long mreg;
1399
1400         /* Clear any crap from the cache or else... */
1401         swift_flush_cache_all();
1402
1403         /* Enable I & D caches */
1404         mreg = srmmu_get_mmureg();
1405         mreg |= (SWIFT_IE | SWIFT_DE);
1406         /*
1407          * The Swift branch folding logic is completely broken.  At
1408          * trap time, if things are just right, if can mistakenly
1409          * think that a trap is coming from kernel mode when in fact
1410          * it is coming from user mode (it mis-executes the branch in
1411          * the trap code).  So you see things like crashme completely
1412          * hosing your machine which is completely unacceptable.  Turn
1413          * this shit off... nice job Fujitsu.
1414          */
1415         mreg &= ~(SWIFT_BF);
1416         srmmu_set_mmureg(mreg);
1417 }
1418
1419 static const struct sparc32_cachetlb_ops swift_ops = {
1420         .cache_all      = swift_flush_cache_all,
1421         .cache_mm       = swift_flush_cache_mm,
1422         .cache_page     = swift_flush_cache_page,
1423         .cache_range    = swift_flush_cache_range,
1424         .tlb_all        = swift_flush_tlb_all,
1425         .tlb_mm         = swift_flush_tlb_mm,
1426         .tlb_page       = swift_flush_tlb_page,
1427         .tlb_range      = swift_flush_tlb_range,
1428         .page_to_ram    = swift_flush_page_to_ram,
1429         .sig_insns      = swift_flush_sig_insns,
1430         .page_for_dma   = swift_flush_page_for_dma,
1431 };
1432
1433 #define SWIFT_MASKID_ADDR  0x10003018
1434 static void __init init_swift(void)
1435 {
1436         unsigned long swift_rev;
1437
1438         __asm__ __volatile__("lda [%1] %2, %0\n\t"
1439                              "srl %0, 0x18, %0\n\t" :
1440                              "=r" (swift_rev) :
1441                              "r" (SWIFT_MASKID_ADDR), "i" (ASI_M_BYPASS));
1442         srmmu_name = "Fujitsu Swift";
1443         switch(swift_rev) {
1444         case 0x11:
1445         case 0x20:
1446         case 0x23:
1447         case 0x30:
1448                 srmmu_modtype = Swift_lots_o_bugs;
1449                 hwbug_bitmask |= (HWBUG_KERN_ACCBROKEN | HWBUG_KERN_CBITBROKEN);
1450                 /*
1451                  * Gee george, I wonder why Sun is so hush hush about
1452                  * this hardware bug... really braindamage stuff going
1453                  * on here.  However I think we can find a way to avoid
1454                  * all of the workaround overhead under Linux.  Basically,
1455                  * any page fault can cause kernel pages to become user
1456                  * accessible (the mmu gets confused and clears some of
1457                  * the ACC bits in kernel ptes).  Aha, sounds pretty
1458                  * horrible eh?  But wait, after extensive testing it appears
1459                  * that if you use pgd_t level large kernel pte's (like the
1460                  * 4MB pages on the Pentium) the bug does not get tripped
1461                  * at all.  This avoids almost all of the major overhead.
1462                  * Welcome to a world where your vendor tells you to,
1463                  * "apply this kernel patch" instead of "sorry for the
1464                  * broken hardware, send it back and we'll give you
1465                  * properly functioning parts"
1466                  */
1467                 break;
1468         case 0x25:
1469         case 0x31:
1470                 srmmu_modtype = Swift_bad_c;
1471                 hwbug_bitmask |= HWBUG_KERN_CBITBROKEN;
1472                 /*
1473                  * You see Sun allude to this hardware bug but never
1474                  * admit things directly, they'll say things like,
1475                  * "the Swift chip cache problems" or similar.
1476                  */
1477                 break;
1478         default:
1479                 srmmu_modtype = Swift_ok;
1480                 break;
1481         }
1482
1483         sparc32_cachetlb_ops = &swift_ops;
1484         flush_page_for_dma_global = 0;
1485
1486         /*
1487          * Are you now convinced that the Swift is one of the
1488          * biggest VLSI abortions of all time?  Bravo Fujitsu!
1489          * Fujitsu, the !#?!%$'d up processor people.  I bet if
1490          * you examined the microcode of the Swift you'd find
1491          * XXX's all over the place.
1492          */
1493         poke_srmmu = poke_swift;
1494 }
1495
1496 static void turbosparc_flush_cache_all(void)
1497 {
1498         flush_user_windows();
1499         turbosparc_idflash_clear();
1500 }
1501
1502 static void turbosparc_flush_cache_mm(struct mm_struct *mm)
1503 {
1504         FLUSH_BEGIN(mm)
1505         flush_user_windows();
1506         turbosparc_idflash_clear();
1507         FLUSH_END
1508 }
1509
1510 static void turbosparc_flush_cache_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
1511 {
1512         FLUSH_BEGIN(vma->vm_mm)
1513         flush_user_windows();
1514         turbosparc_idflash_clear();
1515         FLUSH_END
1516 }
1517
1518 static void turbosparc_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
1519 {
1520         FLUSH_BEGIN(vma->vm_mm)
1521         flush_user_windows();
1522         if (vma->vm_flags & VM_EXEC)
1523                 turbosparc_flush_icache();
1524         turbosparc_flush_dcache();
1525         FLUSH_END
1526 }
1527
1528 /* TurboSparc is copy-back, if we turn it on, but this does not work. */
1529 static void turbosparc_flush_page_to_ram(unsigned long page)
1530 {
1531 #ifdef TURBOSPARC_WRITEBACK
1532         volatile unsigned long clear;
1533
1534         if (srmmu_hwprobe(page))
1535                 turbosparc_flush_page_cache(page);
1536         clear = srmmu_get_fstatus();
1537 #endif
1538 }
1539
1540 static void turbosparc_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
1541 {
1542 }
1543
1544 static void turbosparc_flush_page_for_dma(unsigned long page)
1545 {
1546         turbosparc_flush_dcache();
1547 }
1548
1549 static void turbosparc_flush_tlb_all(void)
1550 {
1551         srmmu_flush_whole_tlb();
1552 }
1553
1554 static void turbosparc_flush_tlb_mm(struct mm_struct *mm)
1555 {
1556         FLUSH_BEGIN(mm)
1557         srmmu_flush_whole_tlb();
1558         FLUSH_END
1559 }
1560
1561 static void turbosparc_flush_tlb_range(struct vm_area_struct *vma, unsigned long start, unsigned long end)
1562 {
1563         FLUSH_BEGIN(vma->vm_mm)
1564         srmmu_flush_whole_tlb();
1565         FLUSH_END
1566 }
1567
1568 static void turbosparc_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
1569 {
1570         FLUSH_BEGIN(vma->vm_mm)
1571         srmmu_flush_whole_tlb();
1572         FLUSH_END
1573 }
1574
1575
1576 static void __cpuinit poke_turbosparc(void)
1577 {
1578         unsigned long mreg = srmmu_get_mmureg();
1579         unsigned long ccreg;
1580
1581         /* Clear any crap from the cache or else... */
1582         turbosparc_flush_cache_all();
1583         mreg &= ~(TURBOSPARC_ICENABLE | TURBOSPARC_DCENABLE); /* Temporarily disable I & D caches */
1584         mreg &= ~(TURBOSPARC_PCENABLE);         /* Don't check parity */
1585         srmmu_set_mmureg(mreg);
1586         
1587         ccreg = turbosparc_get_ccreg();
1588
1589 #ifdef TURBOSPARC_WRITEBACK
1590         ccreg |= (TURBOSPARC_SNENABLE);         /* Do DVMA snooping in Dcache */
1591         ccreg &= ~(TURBOSPARC_uS2 | TURBOSPARC_WTENABLE);
1592                         /* Write-back D-cache, emulate VLSI
1593                          * abortion number three, not number one */
1594 #else
1595         /* For now let's play safe, optimize later */
1596         ccreg |= (TURBOSPARC_SNENABLE | TURBOSPARC_WTENABLE);
1597                         /* Do DVMA snooping in Dcache, Write-thru D-cache */
1598         ccreg &= ~(TURBOSPARC_uS2);
1599                         /* Emulate VLSI abortion number three, not number one */
1600 #endif
1601
1602         switch (ccreg & 7) {
1603         case 0: /* No SE cache */
1604         case 7: /* Test mode */
1605                 break;
1606         default:
1607                 ccreg |= (TURBOSPARC_SCENABLE);
1608         }
1609         turbosparc_set_ccreg (ccreg);
1610
1611         mreg |= (TURBOSPARC_ICENABLE | TURBOSPARC_DCENABLE); /* I & D caches on */
1612         mreg |= (TURBOSPARC_ICSNOOP);           /* Icache snooping on */
1613         srmmu_set_mmureg(mreg);
1614 }
1615
1616 static const struct sparc32_cachetlb_ops turbosparc_ops = {
1617         .cache_all      = turbosparc_flush_cache_all,
1618         .cache_mm       = turbosparc_flush_cache_mm,
1619         .cache_page     = turbosparc_flush_cache_page,
1620         .cache_range    = turbosparc_flush_cache_range,
1621         .tlb_all        = turbosparc_flush_tlb_all,
1622         .tlb_mm         = turbosparc_flush_tlb_mm,
1623         .tlb_page       = turbosparc_flush_tlb_page,
1624         .tlb_range      = turbosparc_flush_tlb_range,
1625         .page_to_ram    = turbosparc_flush_page_to_ram,
1626         .sig_insns      = turbosparc_flush_sig_insns,
1627         .page_for_dma   = turbosparc_flush_page_for_dma,
1628 };
1629
1630 static void __init init_turbosparc(void)
1631 {
1632         srmmu_name = "Fujitsu TurboSparc";
1633         srmmu_modtype = TurboSparc;
1634         sparc32_cachetlb_ops = &turbosparc_ops;
1635         poke_srmmu = poke_turbosparc;
1636 }
1637
1638 static void __cpuinit poke_tsunami(void)
1639 {
1640         unsigned long mreg = srmmu_get_mmureg();
1641
1642         tsunami_flush_icache();
1643         tsunami_flush_dcache();
1644         mreg &= ~TSUNAMI_ITD;
1645         mreg |= (TSUNAMI_IENAB | TSUNAMI_DENAB);
1646         srmmu_set_mmureg(mreg);
1647 }
1648
1649 static const struct sparc32_cachetlb_ops tsunami_ops = {
1650         .cache_all      = tsunami_flush_cache_all,
1651         .cache_mm       = tsunami_flush_cache_mm,
1652         .cache_page     = tsunami_flush_cache_page,
1653         .cache_range    = tsunami_flush_cache_range,
1654         .tlb_all        = tsunami_flush_tlb_all,
1655         .tlb_mm         = tsunami_flush_tlb_mm,
1656         .tlb_page       = tsunami_flush_tlb_page,
1657         .tlb_range      = tsunami_flush_tlb_range,
1658         .page_to_ram    = tsunami_flush_page_to_ram,
1659         .sig_insns      = tsunami_flush_sig_insns,
1660         .page_for_dma   = tsunami_flush_page_for_dma,
1661 };
1662
1663 static void __init init_tsunami(void)
1664 {
1665         /*
1666          * Tsunami's pretty sane, Sun and TI actually got it
1667          * somewhat right this time.  Fujitsu should have
1668          * taken some lessons from them.
1669          */
1670
1671         srmmu_name = "TI Tsunami";
1672         srmmu_modtype = Tsunami;
1673         sparc32_cachetlb_ops = &tsunami_ops;
1674         poke_srmmu = poke_tsunami;
1675
1676         tsunami_setup_blockops();
1677 }
1678
1679 static void __cpuinit poke_viking(void)
1680 {
1681         unsigned long mreg = srmmu_get_mmureg();
1682         static int smp_catch;
1683
1684         if (viking_mxcc_present) {
1685                 unsigned long mxcc_control = mxcc_get_creg();
1686
1687                 mxcc_control |= (MXCC_CTL_ECE | MXCC_CTL_PRE | MXCC_CTL_MCE);
1688                 mxcc_control &= ~(MXCC_CTL_RRC);
1689                 mxcc_set_creg(mxcc_control);
1690
1691                 /*
1692                  * We don't need memory parity checks.
1693                  * XXX This is a mess, have to dig out later. ecd.
1694                 viking_mxcc_turn_off_parity(&mreg, &mxcc_control);
1695                  */
1696
1697                 /* We do cache ptables on MXCC. */
1698                 mreg |= VIKING_TCENABLE;
1699         } else {
1700                 unsigned long bpreg;
1701
1702                 mreg &= ~(VIKING_TCENABLE);
1703                 if(smp_catch++) {
1704                         /* Must disable mixed-cmd mode here for other cpu's. */
1705                         bpreg = viking_get_bpreg();
1706                         bpreg &= ~(VIKING_ACTION_MIX);
1707                         viking_set_bpreg(bpreg);
1708
1709                         /* Just in case PROM does something funny. */
1710                         msi_set_sync();
1711                 }
1712         }
1713
1714         mreg |= VIKING_SPENABLE;
1715         mreg |= (VIKING_ICENABLE | VIKING_DCENABLE);
1716         mreg |= VIKING_SBENABLE;
1717         mreg &= ~(VIKING_ACENABLE);
1718         srmmu_set_mmureg(mreg);
1719 }
1720
1721 static struct sparc32_cachetlb_ops viking_ops = {
1722         .cache_all      = viking_flush_cache_all,
1723         .cache_mm       = viking_flush_cache_mm,
1724         .cache_page     = viking_flush_cache_page,
1725         .cache_range    = viking_flush_cache_range,
1726         .tlb_all        = viking_flush_tlb_all,
1727         .tlb_mm         = viking_flush_tlb_mm,
1728         .tlb_page       = viking_flush_tlb_page,
1729         .tlb_range      = viking_flush_tlb_range,
1730         .page_to_ram    = viking_flush_page_to_ram,
1731         .sig_insns      = viking_flush_sig_insns,
1732         .page_for_dma   = viking_flush_page_for_dma,
1733 };
1734
1735 #ifdef CONFIG_SMP
1736 /* On sun4d the cpu broadcasts local TLB flushes, so we can just
1737  * perform the local TLB flush and all the other cpus will see it.
1738  * But, unfortunately, there is a bug in the sun4d XBUS backplane
1739  * that requires that we add some synchronization to these flushes.
1740  *
1741  * The bug is that the fifo which keeps track of all the pending TLB
1742  * broadcasts in the system is an entry or two too small, so if we
1743  * have too many going at once we'll overflow that fifo and lose a TLB
1744  * flush resulting in corruption.
1745  *
1746  * Our workaround is to take a global spinlock around the TLB flushes,
1747  * which guarentees we won't ever have too many pending.  It's a big
1748  * hammer, but a semaphore like system to make sure we only have N TLB
1749  * flushes going at once will require SMP locking anyways so there's
1750  * no real value in trying any harder than this.
1751  */
1752 static struct sparc32_cachetlb_ops viking_sun4d_smp_ops = {
1753         .cache_all      = viking_flush_cache_all,
1754         .cache_mm       = viking_flush_cache_mm,
1755         .cache_page     = viking_flush_cache_page,
1756         .cache_range    = viking_flush_cache_range,
1757         .tlb_all        = sun4dsmp_flush_tlb_all,
1758         .tlb_mm         = sun4dsmp_flush_tlb_mm,
1759         .tlb_page       = sun4dsmp_flush_tlb_page,
1760         .tlb_range      = sun4dsmp_flush_tlb_range,
1761         .page_to_ram    = viking_flush_page_to_ram,
1762         .sig_insns      = viking_flush_sig_insns,
1763         .page_for_dma   = viking_flush_page_for_dma,
1764 };
1765 #endif
1766
1767 static void __init init_viking(void)
1768 {
1769         unsigned long mreg = srmmu_get_mmureg();
1770
1771         /* Ahhh, the viking.  SRMMU VLSI abortion number two... */
1772         if(mreg & VIKING_MMODE) {
1773                 srmmu_name = "TI Viking";
1774                 viking_mxcc_present = 0;
1775                 msi_set_sync();
1776
1777                 /*
1778                  * We need this to make sure old viking takes no hits
1779                  * on it's cache for dma snoops to workaround the
1780                  * "load from non-cacheable memory" interrupt bug.
1781                  * This is only necessary because of the new way in
1782                  * which we use the IOMMU.
1783                  */
1784                 viking_ops.page_for_dma = viking_flush_page;
1785 #ifdef CONFIG_SMP
1786                 viking_sun4d_smp_ops.page_for_dma = viking_flush_page;
1787 #endif
1788                 flush_page_for_dma_global = 0;
1789         } else {
1790                 srmmu_name = "TI Viking/MXCC";
1791                 viking_mxcc_present = 1;
1792                 srmmu_cache_pagetables = 1;
1793         }
1794
1795         sparc32_cachetlb_ops = (const struct sparc32_cachetlb_ops *)
1796                 &viking_ops;
1797 #ifdef CONFIG_SMP
1798         if (sparc_cpu_model == sun4d)
1799                 sparc32_cachetlb_ops = (const struct sparc32_cachetlb_ops *)
1800                         &viking_sun4d_smp_ops;
1801 #endif
1802
1803         poke_srmmu = poke_viking;
1804 }
1805
1806 #ifdef CONFIG_SPARC_LEON
1807 static void leon_flush_cache_mm(struct mm_struct *mm)
1808 {
1809         leon_flush_cache_all();
1810 }
1811
1812 static void leon_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
1813 {
1814         leon_flush_pcache_all(vma, page);
1815 }
1816
1817 static void leon_flush_cache_range(struct vm_area_struct *vma,
1818                                    unsigned long start,
1819                                    unsigned long end)
1820 {
1821         leon_flush_cache_all();
1822 }
1823
1824 static void leon_flush_tlb_mm(struct mm_struct *mm)
1825 {
1826         leon_flush_tlb_all();
1827 }
1828
1829 static void leon_flush_tlb_page(struct vm_area_struct *vma,
1830                                 unsigned long page)
1831 {
1832         leon_flush_tlb_all();
1833 }
1834
1835 static void leon_flush_tlb_range(struct vm_area_struct *vma,
1836                                  unsigned long start,
1837                                  unsigned long end)
1838 {
1839         leon_flush_tlb_all();
1840 }
1841
1842 static void leon_flush_page_to_ram(unsigned long page)
1843 {
1844         leon_flush_cache_all();
1845 }
1846
1847 static void leon_flush_sig_insns(struct mm_struct *mm, unsigned long page)
1848 {
1849         leon_flush_cache_all();
1850 }
1851
1852 static void leon_flush_page_for_dma(unsigned long page)
1853 {
1854         leon_flush_dcache_all();
1855 }
1856
1857 void __init poke_leonsparc(void)
1858 {
1859 }
1860
1861 static const struct sparc32_cachetlb_ops leon_ops = {
1862         .cache_all      = leon_flush_cache_all,
1863         .cache_mm       = leon_flush_cache_mm,
1864         .cache_page     = leon_flush_cache_page,
1865         .cache_range    = leon_flush_cache_range,
1866         .tlb_all        = leon_flush_tlb_all,
1867         .tlb_mm         = leon_flush_tlb_mm,
1868         .tlb_page       = leon_flush_tlb_page,
1869         .tlb_range      = leon_flush_tlb_range,
1870         .page_to_ram    = leon_flush_page_to_ram,
1871         .sig_insns      = leon_flush_sig_insns,
1872         .page_for_dma   = leon_flush_page_for_dma,
1873 };
1874
1875 void __init init_leon(void)
1876 {
1877         srmmu_name = "LEON";
1878         sparc32_cachetlb_ops = &leon_ops;
1879         poke_srmmu = poke_leonsparc;
1880
1881         srmmu_cache_pagetables = 0;
1882
1883         leon_flush_during_switch = leon_flush_needed();
1884 }
1885 #endif
1886
1887 /* Probe for the srmmu chip version. */
1888 static void __init get_srmmu_type(void)
1889 {
1890         unsigned long mreg, psr;
1891         unsigned long mod_typ, mod_rev, psr_typ, psr_vers;
1892
1893         srmmu_modtype = SRMMU_INVAL_MOD;
1894         hwbug_bitmask = 0;
1895
1896         mreg = srmmu_get_mmureg(); psr = get_psr();
1897         mod_typ = (mreg & 0xf0000000) >> 28;
1898         mod_rev = (mreg & 0x0f000000) >> 24;
1899         psr_typ = (psr >> 28) & 0xf;
1900         psr_vers = (psr >> 24) & 0xf;
1901
1902         /* First, check for sparc-leon. */
1903         if (sparc_cpu_model == sparc_leon) {
1904                 init_leon();
1905                 return;
1906         }
1907
1908         /* Second, check for HyperSparc or Cypress. */
1909         if(mod_typ == 1) {
1910                 switch(mod_rev) {
1911                 case 7:
1912                         /* UP or MP Hypersparc */
1913                         init_hypersparc();
1914                         break;
1915                 case 0:
1916                 case 2:
1917                         /* Uniprocessor Cypress */
1918                         init_cypress_604();
1919                         break;
1920                 case 10:
1921                 case 11:
1922                 case 12:
1923                         /* _REALLY OLD_ Cypress MP chips... */
1924                 case 13:
1925                 case 14:
1926                 case 15:
1927                         /* MP Cypress mmu/cache-controller */
1928                         init_cypress_605(mod_rev);
1929                         break;
1930                 default:
1931                         /* Some other Cypress revision, assume a 605. */
1932                         init_cypress_605(mod_rev);
1933                         break;
1934                 }
1935                 return;
1936         }
1937         
1938         /*
1939          * Now Fujitsu TurboSparc. It might happen that it is
1940          * in Swift emulation mode, so we will check later...
1941          */
1942         if (psr_typ == 0 && psr_vers == 5) {
1943                 init_turbosparc();
1944                 return;
1945         }
1946
1947         /* Next check for Fujitsu Swift. */
1948         if(psr_typ == 0 && psr_vers == 4) {
1949                 phandle cpunode;
1950                 char node_str[128];
1951
1952                 /* Look if it is not a TurboSparc emulating Swift... */
1953                 cpunode = prom_getchild(prom_root_node);
1954                 while((cpunode = prom_getsibling(cpunode)) != 0) {
1955                         prom_getstring(cpunode, "device_type", node_str, sizeof(node_str));
1956                         if(!strcmp(node_str, "cpu")) {
1957                                 if (!prom_getintdefault(cpunode, "psr-implementation", 1) &&
1958                                     prom_getintdefault(cpunode, "psr-version", 1) == 5) {
1959                                         init_turbosparc();
1960                                         return;
1961                                 }
1962                                 break;
1963                         }
1964                 }
1965                 
1966                 init_swift();
1967                 return;
1968         }
1969
1970         /* Now the Viking family of srmmu. */
1971         if(psr_typ == 4 &&
1972            ((psr_vers == 0) ||
1973             ((psr_vers == 1) && (mod_typ == 0) && (mod_rev == 0)))) {
1974                 init_viking();
1975                 return;
1976         }
1977
1978         /* Finally the Tsunami. */
1979         if(psr_typ == 4 && psr_vers == 1 && (mod_typ || mod_rev)) {
1980                 init_tsunami();
1981                 return;
1982         }
1983
1984         /* Oh well */
1985         srmmu_is_bad();
1986 }
1987
1988 #ifdef CONFIG_SMP
1989 /* Local cross-calls. */
1990 static void smp_flush_page_for_dma(unsigned long page)
1991 {
1992         xc1((smpfunc_t) local_ops->page_for_dma, page);
1993         local_ops->page_for_dma(page);
1994 }
1995
1996 static void smp_flush_cache_all(void)
1997 {
1998         xc0((smpfunc_t) local_ops->cache_all);
1999         local_ops->cache_all();
2000 }
2001
2002 static void smp_flush_tlb_all(void)
2003 {
2004         xc0((smpfunc_t) local_ops->tlb_all);
2005         local_ops->tlb_all();
2006 }
2007
2008 static void smp_flush_cache_mm(struct mm_struct *mm)
2009 {
2010         if (mm->context != NO_CONTEXT) {
2011                 cpumask_t cpu_mask;
2012                 cpumask_copy(&cpu_mask, mm_cpumask(mm));
2013                 cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
2014                 if (!cpumask_empty(&cpu_mask))
2015                         xc1((smpfunc_t) local_ops->cache_mm, (unsigned long) mm);
2016                 local_ops->cache_mm(mm);
2017         }
2018 }
2019
2020 static void smp_flush_tlb_mm(struct mm_struct *mm)
2021 {
2022         if (mm->context != NO_CONTEXT) {
2023                 cpumask_t cpu_mask;
2024                 cpumask_copy(&cpu_mask, mm_cpumask(mm));
2025                 cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
2026                 if (!cpumask_empty(&cpu_mask)) {
2027                         xc1((smpfunc_t) local_ops->tlb_mm, (unsigned long) mm);
2028                         if (atomic_read(&mm->mm_users) == 1 && current->active_mm == mm)
2029                                 cpumask_copy(mm_cpumask(mm),
2030                                              cpumask_of(smp_processor_id()));
2031                 }
2032                 local_ops->tlb_mm(mm);
2033         }
2034 }
2035
2036 static void smp_flush_cache_range(struct vm_area_struct *vma,
2037                                   unsigned long start,
2038                                   unsigned long end)
2039 {
2040         struct mm_struct *mm = vma->vm_mm;
2041
2042         if (mm->context != NO_CONTEXT) {
2043                 cpumask_t cpu_mask;
2044                 cpumask_copy(&cpu_mask, mm_cpumask(mm));
2045                 cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
2046                 if (!cpumask_empty(&cpu_mask))
2047                         xc3((smpfunc_t) local_ops->cache_range,
2048                             (unsigned long) vma, start, end);
2049                 local_ops->cache_range(vma, start, end);
2050         }
2051 }
2052
2053 static void smp_flush_tlb_range(struct vm_area_struct *vma,
2054                                 unsigned long start,
2055                                 unsigned long end)
2056 {
2057         struct mm_struct *mm = vma->vm_mm;
2058
2059         if (mm->context != NO_CONTEXT) {
2060                 cpumask_t cpu_mask;
2061                 cpumask_copy(&cpu_mask, mm_cpumask(mm));
2062                 cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
2063                 if (!cpumask_empty(&cpu_mask))
2064                         xc3((smpfunc_t) local_ops->tlb_range,
2065                             (unsigned long) vma, start, end);
2066                 local_ops->tlb_range(vma, start, end);
2067         }
2068 }
2069
2070 static void smp_flush_cache_page(struct vm_area_struct *vma, unsigned long page)
2071 {
2072         struct mm_struct *mm = vma->vm_mm;
2073
2074         if (mm->context != NO_CONTEXT) {
2075                 cpumask_t cpu_mask;
2076                 cpumask_copy(&cpu_mask, mm_cpumask(mm));
2077                 cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
2078                 if (!cpumask_empty(&cpu_mask))
2079                         xc2((smpfunc_t) local_ops->cache_page,
2080                             (unsigned long) vma, page);
2081                 local_ops->cache_page(vma, page);
2082         }
2083 }
2084
2085 static void smp_flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
2086 {
2087         struct mm_struct *mm = vma->vm_mm;
2088
2089         if (mm->context != NO_CONTEXT) {
2090                 cpumask_t cpu_mask;
2091                 cpumask_copy(&cpu_mask, mm_cpumask(mm));
2092                 cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
2093                 if (!cpumask_empty(&cpu_mask))
2094                         xc2((smpfunc_t) local_ops->tlb_page,
2095                             (unsigned long) vma, page);
2096                 local_ops->tlb_page(vma, page);
2097         }
2098 }
2099
2100 static void smp_flush_page_to_ram(unsigned long page)
2101 {
2102         /* Current theory is that those who call this are the one's
2103          * who have just dirtied their cache with the pages contents
2104          * in kernel space, therefore we only run this on local cpu.
2105          *
2106          * XXX This experiment failed, research further... -DaveM
2107          */
2108 #if 1
2109         xc1((smpfunc_t) local_ops->page_to_ram, page);
2110 #endif
2111         local_ops->page_to_ram(page);
2112 }
2113
2114 static void smp_flush_sig_insns(struct mm_struct *mm, unsigned long insn_addr)
2115 {
2116         cpumask_t cpu_mask;
2117         cpumask_copy(&cpu_mask, mm_cpumask(mm));
2118         cpumask_clear_cpu(smp_processor_id(), &cpu_mask);
2119         if (!cpumask_empty(&cpu_mask))
2120                 xc2((smpfunc_t) local_ops->sig_insns,
2121                     (unsigned long) mm, insn_addr);
2122         local_ops->sig_insns(mm, insn_addr);
2123 }
2124
2125 static struct sparc32_cachetlb_ops smp_cachetlb_ops = {
2126         .cache_all      = smp_flush_cache_all,
2127         .cache_mm       = smp_flush_cache_mm,
2128         .cache_page     = smp_flush_cache_page,
2129         .cache_range    = smp_flush_cache_range,
2130         .tlb_all        = smp_flush_tlb_all,
2131         .tlb_mm         = smp_flush_tlb_mm,
2132         .tlb_page       = smp_flush_tlb_page,
2133         .tlb_range      = smp_flush_tlb_range,
2134         .page_to_ram    = smp_flush_page_to_ram,
2135         .sig_insns      = smp_flush_sig_insns,
2136         .page_for_dma   = smp_flush_page_for_dma,
2137 };
2138 #endif
2139
2140 /* Load up routines and constants for sun4m and sun4d mmu */
2141 void __init load_mmu(void)
2142 {
2143         extern void ld_mmu_iommu(void);
2144         extern void ld_mmu_iounit(void);
2145
2146         /* Functions */
2147         get_srmmu_type();
2148
2149 #ifdef CONFIG_SMP
2150         /* El switcheroo... */
2151         local_ops = sparc32_cachetlb_ops;
2152
2153         if (sparc_cpu_model == sun4d || sparc_cpu_model == sparc_leon) {
2154                 smp_cachetlb_ops.tlb_all = local_ops->tlb_all;
2155                 smp_cachetlb_ops.tlb_mm = local_ops->tlb_mm;
2156                 smp_cachetlb_ops.tlb_range = local_ops->tlb_range;
2157                 smp_cachetlb_ops.tlb_page = local_ops->tlb_page;
2158         }
2159
2160         if (poke_srmmu == poke_viking) {
2161                 /* Avoid unnecessary cross calls. */
2162                 smp_cachetlb_ops.cache_all = local_ops->cache_all;
2163                 smp_cachetlb_ops.cache_mm = local_ops->cache_mm;
2164                 smp_cachetlb_ops.cache_range = local_ops->cache_range;
2165                 smp_cachetlb_ops.cache_page = local_ops->cache_page;
2166
2167                 smp_cachetlb_ops.page_to_ram = local_ops->page_to_ram;
2168                 smp_cachetlb_ops.sig_insns = local_ops->sig_insns;
2169                 smp_cachetlb_ops.page_for_dma = local_ops->page_for_dma;
2170         }
2171
2172         /* It really is const after this point. */
2173         sparc32_cachetlb_ops = (const struct sparc32_cachetlb_ops *)
2174                 &smp_cachetlb_ops;
2175 #endif
2176
2177         if (sparc_cpu_model == sun4d)
2178                 ld_mmu_iounit();
2179         else
2180                 ld_mmu_iommu();
2181 #ifdef CONFIG_SMP
2182         if (sparc_cpu_model == sun4d)
2183                 sun4d_init_smp();
2184         else if (sparc_cpu_model == sparc_leon)
2185                 leon_init_smp();
2186         else
2187                 sun4m_init_smp();
2188 #endif
2189         btfixup();
2190 }