Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux
[sfrench/cifs-2.6.git] / arch / x86 / hyperv / mmu.c
1 #define pr_fmt(fmt)  "Hyper-V: " fmt
2
3 #include <linux/hyperv.h>
4 #include <linux/log2.h>
5 #include <linux/slab.h>
6 #include <linux/types.h>
7
8 #include <asm/fpu/api.h>
9 #include <asm/mshyperv.h>
10 #include <asm/msr.h>
11 #include <asm/tlbflush.h>
12
13 #define CREATE_TRACE_POINTS
14 #include <asm/trace/hyperv.h>
15
16 /* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
17 struct hv_flush_pcpu {
18         u64 address_space;
19         u64 flags;
20         u64 processor_mask;
21         u64 gva_list[];
22 };
23
24 /* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
25 struct hv_flush_pcpu_ex {
26         u64 address_space;
27         u64 flags;
28         struct {
29                 u64 format;
30                 u64 valid_bank_mask;
31                 u64 bank_contents[];
32         } hv_vp_set;
33         u64 gva_list[];
34 };
35
36 /* Each gva in gva_list encodes up to 4096 pages to flush */
37 #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE)
38
39 static struct hv_flush_pcpu __percpu **pcpu_flush;
40
41 static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex;
42
43 /*
44  * Fills in gva_list starting from offset. Returns the number of items added.
45  */
46 static inline int fill_gva_list(u64 gva_list[], int offset,
47                                 unsigned long start, unsigned long end)
48 {
49         int gva_n = offset;
50         unsigned long cur = start, diff;
51
52         do {
53                 diff = end > cur ? end - cur : 0;
54
55                 gva_list[gva_n] = cur & PAGE_MASK;
56                 /*
57                  * Lower 12 bits encode the number of additional
58                  * pages to flush (in addition to the 'cur' page).
59                  */
60                 if (diff >= HV_TLB_FLUSH_UNIT)
61                         gva_list[gva_n] |= ~PAGE_MASK;
62                 else if (diff)
63                         gva_list[gva_n] |= (diff - 1) >> PAGE_SHIFT;
64
65                 cur += HV_TLB_FLUSH_UNIT;
66                 gva_n++;
67
68         } while (cur < end);
69
70         return gva_n - offset;
71 }
72
73 /* Return the number of banks in the resulting vp_set */
74 static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush,
75                                     const struct cpumask *cpus)
76 {
77         int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1;
78
79         /* valid_bank_mask can represent up to 64 banks */
80         if (hv_max_vp_index / 64 >= 64)
81                 return 0;
82
83         /*
84          * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex
85          * structs are not cleared between calls, we risk flushing unneeded
86          * vCPUs otherwise.
87          */
88         for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++)
89                 flush->hv_vp_set.bank_contents[vcpu_bank] = 0;
90
91         /*
92          * Some banks may end up being empty but this is acceptable.
93          */
94         for_each_cpu(cpu, cpus) {
95                 vcpu = hv_cpu_number_to_vp_number(cpu);
96                 vcpu_bank = vcpu / 64;
97                 vcpu_offset = vcpu % 64;
98                 __set_bit(vcpu_offset, (unsigned long *)
99                           &flush->hv_vp_set.bank_contents[vcpu_bank]);
100                 if (vcpu_bank >= nr_bank)
101                         nr_bank = vcpu_bank + 1;
102         }
103         flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0);
104
105         return nr_bank;
106 }
107
108 static void hyperv_flush_tlb_others(const struct cpumask *cpus,
109                                     const struct flush_tlb_info *info)
110 {
111         int cpu, vcpu, gva_n, max_gvas;
112         struct hv_flush_pcpu **flush_pcpu;
113         struct hv_flush_pcpu *flush;
114         u64 status = U64_MAX;
115         unsigned long flags;
116
117         trace_hyperv_mmu_flush_tlb_others(cpus, info);
118
119         if (!pcpu_flush || !hv_hypercall_pg)
120                 goto do_native;
121
122         if (cpumask_empty(cpus))
123                 return;
124
125         local_irq_save(flags);
126
127         flush_pcpu = this_cpu_ptr(pcpu_flush);
128
129         if (unlikely(!*flush_pcpu))
130                 *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
131
132         flush = *flush_pcpu;
133
134         if (unlikely(!flush)) {
135                 local_irq_restore(flags);
136                 goto do_native;
137         }
138
139         if (info->mm) {
140                 flush->address_space = virt_to_phys(info->mm->pgd);
141                 flush->flags = 0;
142         } else {
143                 flush->address_space = 0;
144                 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
145         }
146
147         flush->processor_mask = 0;
148         if (cpumask_equal(cpus, cpu_present_mask)) {
149                 flush->flags |= HV_FLUSH_ALL_PROCESSORS;
150         } else {
151                 for_each_cpu(cpu, cpus) {
152                         vcpu = hv_cpu_number_to_vp_number(cpu);
153                         if (vcpu >= 64)
154                                 goto do_native;
155
156                         __set_bit(vcpu, (unsigned long *)
157                                   &flush->processor_mask);
158                 }
159         }
160
161         /*
162          * We can flush not more than max_gvas with one hypercall. Flush the
163          * whole address space if we were asked to do more.
164          */
165         max_gvas = (PAGE_SIZE - sizeof(*flush)) / sizeof(flush->gva_list[0]);
166
167         if (info->end == TLB_FLUSH_ALL) {
168                 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
169                 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
170                                          flush, NULL);
171         } else if (info->end &&
172                    ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
173                 status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE,
174                                          flush, NULL);
175         } else {
176                 gva_n = fill_gva_list(flush->gva_list, 0,
177                                       info->start, info->end);
178                 status = hv_do_rep_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST,
179                                              gva_n, 0, flush, NULL);
180         }
181
182         local_irq_restore(flags);
183
184         if (!(status & HV_HYPERCALL_RESULT_MASK))
185                 return;
186 do_native:
187         native_flush_tlb_others(cpus, info);
188 }
189
190 static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus,
191                                        const struct flush_tlb_info *info)
192 {
193         int nr_bank = 0, max_gvas, gva_n;
194         struct hv_flush_pcpu_ex **flush_pcpu;
195         struct hv_flush_pcpu_ex *flush;
196         u64 status = U64_MAX;
197         unsigned long flags;
198
199         trace_hyperv_mmu_flush_tlb_others(cpus, info);
200
201         if (!pcpu_flush_ex || !hv_hypercall_pg)
202                 goto do_native;
203
204         if (cpumask_empty(cpus))
205                 return;
206
207         local_irq_save(flags);
208
209         flush_pcpu = this_cpu_ptr(pcpu_flush_ex);
210
211         if (unlikely(!*flush_pcpu))
212                 *flush_pcpu = page_address(alloc_page(GFP_ATOMIC));
213
214         flush = *flush_pcpu;
215
216         if (unlikely(!flush)) {
217                 local_irq_restore(flags);
218                 goto do_native;
219         }
220
221         if (info->mm) {
222                 flush->address_space = virt_to_phys(info->mm->pgd);
223                 flush->flags = 0;
224         } else {
225                 flush->address_space = 0;
226                 flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
227         }
228
229         flush->hv_vp_set.valid_bank_mask = 0;
230
231         if (!cpumask_equal(cpus, cpu_present_mask)) {
232                 flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K;
233                 nr_bank = cpumask_to_vp_set(flush, cpus);
234         }
235
236         if (!nr_bank) {
237                 flush->hv_vp_set.format = HV_GENERIC_SET_ALL;
238                 flush->flags |= HV_FLUSH_ALL_PROCESSORS;
239         }
240
241         /*
242          * We can flush not more than max_gvas with one hypercall. Flush the
243          * whole address space if we were asked to do more.
244          */
245         max_gvas =
246                 (PAGE_SIZE - sizeof(*flush) - nr_bank *
247                  sizeof(flush->hv_vp_set.bank_contents[0])) /
248                 sizeof(flush->gva_list[0]);
249
250         if (info->end == TLB_FLUSH_ALL) {
251                 flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY;
252                 status = hv_do_rep_hypercall(
253                         HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
254                         0, nr_bank, flush, NULL);
255         } else if (info->end &&
256                    ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) {
257                 status = hv_do_rep_hypercall(
258                         HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
259                         0, nr_bank, flush, NULL);
260         } else {
261                 gva_n = fill_gva_list(flush->gva_list, nr_bank,
262                                       info->start, info->end);
263                 status = hv_do_rep_hypercall(
264                         HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX,
265                         gva_n, nr_bank, flush, NULL);
266         }
267
268         local_irq_restore(flags);
269
270         if (!(status & HV_HYPERCALL_RESULT_MASK))
271                 return;
272 do_native:
273         native_flush_tlb_others(cpus, info);
274 }
275
276 void hyperv_setup_mmu_ops(void)
277 {
278         if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
279                 return;
280
281         setup_clear_cpu_cap(X86_FEATURE_PCID);
282
283         if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) {
284                 pr_info("Using hypercall for remote TLB flush\n");
285                 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others;
286         } else {
287                 pr_info("Using ext hypercall for remote TLB flush\n");
288                 pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex;
289         }
290 }
291
292 void hyper_alloc_mmu(void)
293 {
294         if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED))
295                 return;
296
297         if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
298                 pcpu_flush = alloc_percpu(struct hv_flush_pcpu *);
299         else
300                 pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *);
301 }