arch/x86/hyperv/mmu.c

   1 #define pr_fmt(fmt)  "Hyper-V: " fmt
   2
   3 #include <linux/hyperv.h>
   4 #include <linux/log2.h>
   5 #include <linux/slab.h>
   6 #include <linux/types.h>
   7
   8 #include <asm/fpu/api.h>
   9 #include <asm/mshyperv.h>
  10 #include <asm/msr.h>
  11 #include <asm/tlbflush.h>
  12
  13 #define CREATE_TRACE_POINTS
  14 #include <asm/trace/hyperv.h>
  15
  16 /* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
  17 struct hv_flush_pcpu {
  18         u64 address_space;
  19         u64 flags;
  20         u64 processor_mask;
  21         u64 gva_list[];
  22 };
  23
  24 /* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
  25 struct hv_flush_pcpu_ex {
  26         u64 address_space;
  27         u64 flags;
  28         struct {
  29                 u64 format;
  30                 u64 valid_bank_mask;
  31                 u64 bank_contents[];
  32         } hv_vp_set;
  33         u64 gva_list[];
  34 };
  35
  36 /* Each gva in gva_list encodes up to 4096 pages to flush */
  37 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
  38
  39 static struct hv_flush_pcpu __percpu *pcpu_flush;
  40
  41 static struct hv_flush_pcpu_ex __percpu *pcpu_flush_ex;
  42
  43 /*
  44  * Fills in gva_list starting from offset. Returns the number of items added.
  45  */
  46 static inline int fill_gva_list(u64 gva_list[], int offset,
  47                                 unsigned long start, unsigned long end)
  48 {
  49         int gva_n = offset;
  50         unsigned long cur = start, diff;
  51
  52         do {
  53                 diff = end > cur ? end - cur : 0;
  54
  55                 gva_list[gva_n] = cur & PAGE_MASK;
  56                 /*
  57                  * Lower 12 bits encode the number of additional
  58                  * pages to flush (in addition to the 'cur' page).
  59                  */
  60                 if (diff >= HV_TLB_FLUSH_UNIT)
  61                         gva_list[gva_n] |= ~PAGE_MASK;
  62                 else if (diff)
  63                         gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
  64
  65                 cur += HV_TLB_FLUSH_UNIT;
  66                 gva_n++;
  67
  68         } while (cur < end);
  69
  70         return gva_n - offset;
  71 }
  72
  73 /* Return the number of banks in the resulting vp_set */
  74 static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
  75                                     const struct cpumask *cpus)
  76 {
  77         int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
  78
  79         /*
  80          * Some banks may end up being empty but this is acceptable.
  81          */
  82         for_each_cpu(cpu, cpus) {
  83                 vcpu = hv_cpu_number_to_vp_number(cpu);
  84                 vcpu_bank = vcpu / 64;
  85                 vcpu_offset = vcpu % 64;
  86
  87                 /* valid_bank_mask can represent up to 64 banks */
  88                 if (vcpu_bank >= 64)
  89                         return 0;
  90
  91                 __set_bit(vcpu_offset, (unsigned long *)
  92                           &flush->hv_vp_set.bank_contents[vcpu_bank]);
  93                 if (vcpu_bank >= nr_bank)
  94                         nr_bank = vcpu_bank + 1;
  95         }
  96         flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
  97
  98         return nr_bank;
  99 }
 100
 101 static void hyperv_flush_tlb_others(const struct cpumask *cpus,
 102                                     const struct flush_tlb_info *info)
 103 {
 104         int cpu, vcpu, gva_n, max_gvas;
 105         struct hv_flush_pcpu *flush;
 106         u64 status = U64_MAX;
 107         unsigned long flags;
 108
 109         trace_hyperv_mmu_flush_tlb_others(cpus, info);
 110
 111         if (!pcpu_flush || !hv_hypercall_pg)
 112                 goto do_native;
 113
 114         if (cpumask_empty(cpus))
 115                 return;
 116
 117         local_irq_save(flags);
 118
 119         flush = this_cpu_ptr(pcpu_flush);
 120
 121         if (info->mm) {
 122                 flush->address_space = virt_to_phys(info->mm->pgd);
 123                 flush->flags = 0;
 124         } else {
 125                 flush->address_space = 0;
 126                 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
 127         }
 128
 129         flush->processor_mask = 0;
 130         if (cpumask_equal(cpus, cpu_present_mask)) {
 131                 flush->flags |= HV_FLUSH_ALL_PROCESSORS;
 132         } else {
 133                 for_each_cpu(cpu, cpus) {
 134                         vcpu = hv_cpu_number_to_vp_number(cpu);
 135                         if (vcpu >= 64)
 136                                 goto do_native;
 137
 138                         __set_bit(vcpu, (unsigned long *)
 139                                   &flush->processor_mask);
 140                 }
 141         }
 142
 143         /*
 144          * We can flush not more than max_gvas with one hypercall. Flush the
 145          * whole address space if we were asked to do more.
 146          */
 147         max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
 148
 149         if (info->end == TLB_FLUSH_ALL) {
 150                 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
 151                 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
 152                                          flush, NULL);
 153         } else if (info->end &&
 154                    ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
 155                 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
 156                                          flush, NULL);
 157         } else {
 158                 gva_n = fill_gva_list(flush->gva_list, 0,
 159                                       info->start, info->end);
 160                 status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
 161                                              gva_n, 0, flush, NULL);
 162         }
 163
 164         local_irq_restore(flags);
 165
 166         if (!(status & HV_HYPERCALL_RESULT_MASK))
 167                 return;
 168 do_native:
 169         native_flush_tlb_others(cpus, info);
 170 }
 171
 172 static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
 173                                        const struct flush_tlb_info *info)
 174 {
 175         int nr_bank = 0, max_gvas, gva_n;
 176         struct hv_flush_pcpu_ex *flush;
 177         u64 status = U64_MAX;
 178         unsigned long flags;
 179
 180         trace_hyperv_mmu_flush_tlb_others(cpus, info);
 181
 182         if (!pcpu_flush_ex || !hv_hypercall_pg)
 183                 goto do_native;
 184
 185         if (cpumask_empty(cpus))
 186                 return;
 187
 188         local_irq_save(flags);
 189
 190         flush = this_cpu_ptr(pcpu_flush_ex);
 191
 192         if (info->mm) {
 193                 flush->address_space = virt_to_phys(info->mm->pgd);
 194                 flush->flags = 0;
 195         } else {
 196                 flush->address_space = 0;
 197                 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
 198         }
 199
 200         flush->hv_vp_set.valid_bank_mask = 0;
 201
 202         if (!cpumask_equal(cpus, cpu_present_mask)) {
 203                 flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
 204                 nr_bank = cpumask_to_vp_set(flush, cpus);
 205         }
 206
 207         if (!nr_bank) {
 208                 flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
 209                 flush->flags |= HV_FLUSH_ALL_PROCESSORS;
 210         }
 211
 212         /*
 213          * We can flush not more than max_gvas with one hypercall. Flush the
 214          * whole address space if we were asked to do more.
 215          */
 216         max_gvas =
 217                 (PAGE_SIZE - sizeof(*flush) - nr_bank *
 218                  sizeof(flush->hv_vp_set.bank_contents[0])) /
 219                 sizeof(flush->gva_list[0]);
 220
 221         if (info->end == TLB_FLUSH_ALL) {
 222                 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
 223                 status = hv_do_rep_hypercall(
 224                         HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
 225                         0, nr_bank + 2, flush, NULL);
 226         } else if (info->end &&
 227                    ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
 228                 status = hv_do_rep_hypercall(
 229                         HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
 230                         0, nr_bank + 2, flush, NULL);
 231         } else {
 232                 gva_n = fill_gva_list(flush->gva_list, nr_bank,
 233                                       info->start, info->end);
 234                 status = hv_do_rep_hypercall(
 235                         HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
 236                         gva_n, nr_bank + 2, flush, NULL);
 237         }
 238
 239         local_irq_restore(flags);
 240
 241         if (!(status & HV_HYPERCALL_RESULT_MASK))
 242                 return;
 243 do_native:
 244         native_flush_tlb_others(cpus, info);
 245 }
 246
 247 void hyperv_setup_mmu_ops(void)
 248 {
 249         if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
 250                 return;
 251
 252         setup_clear_cpu_cap(X86_FEATURE_PCID);
 253
 254         if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
 255                 pr_info("Using hypercall for remote TLB flush\n");
 256                 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
 257         } else {
 258                 pr_info("Using ext hypercall for remote TLB flush\n");
 259                 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
 260         }
 261 }
 262
 263 void hyper_alloc_mmu(void)
 264 {
 265         if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
 266                 return;
 267
 268         if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
 269                 pcpu_flush = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
 270         else
 271                 pcpu_flush_ex = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
 272 }