arch/x86/kernel/ldt.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * Copyright (C) 1992 Krishna Balasubramanian and Linus Torvalds
   4  * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
   5  * Copyright (C) 2002 Andi Kleen
   6  *
   7  * This handles calls from both 32bit and 64bit mode.
   8  *
   9  * Lock order:
  10  *      contex.ldt_usr_sem
  11  *        mmap_sem
  12  *          context.lock
  13  */
  14
  15 #include <linux/errno.h>
  16 #include <linux/gfp.h>
  17 #include <linux/sched.h>
  18 #include <linux/string.h>
  19 #include <linux/mm.h>
  20 #include <linux/smp.h>
  21 #include <linux/syscalls.h>
  22 #include <linux/slab.h>
  23 #include <linux/vmalloc.h>
  24 #include <linux/uaccess.h>
  25
  26 #include <asm/ldt.h>
  27 #include <asm/tlb.h>
  28 #include <asm/desc.h>
  29 #include <asm/mmu_context.h>
  30 #include <asm/syscalls.h>
  31
  32 static void refresh_ldt_segments(void)
  33 {
  34 #ifdef CONFIG_X86_64
  35         unsigned short sel;
  36
  37         /*
  38          * Make sure that the cached DS and ES descriptors match the updated
  39          * LDT.
  40          */
  41         savesegment(ds, sel);
  42         if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
  43                 loadsegment(ds, sel);
  44
  45         savesegment(es, sel);
  46         if ((sel & SEGMENT_TI_MASK) == SEGMENT_LDT)
  47                 loadsegment(es, sel);
  48 #endif
  49 }
  50
  51 /* context.lock is held by the task which issued the smp function call */
  52 static void flush_ldt(void *__mm)
  53 {
  54         struct mm_struct *mm = __mm;
  55
  56         if (this_cpu_read(cpu_tlbstate.loaded_mm) != mm)
  57                 return;
  58
  59         load_mm_ldt(mm);
  60
  61         refresh_ldt_segments();
  62 }
  63
  64 /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
  65 static struct ldt_struct *alloc_ldt_struct(unsigned int num_entries)
  66 {
  67         struct ldt_struct *new_ldt;
  68         unsigned int alloc_size;
  69
  70         if (num_entries > LDT_ENTRIES)
  71                 return NULL;
  72
  73         new_ldt = kmalloc(sizeof(struct ldt_struct), GFP_KERNEL);
  74         if (!new_ldt)
  75                 return NULL;
  76
  77         BUILD_BUG_ON(LDT_ENTRY_SIZE != sizeof(struct desc_struct));
  78         alloc_size = num_entries * LDT_ENTRY_SIZE;
  79
  80         /*
  81          * Xen is very picky: it requires a page-aligned LDT that has no
  82          * trailing nonzero bytes in any page that contains LDT descriptors.
  83          * Keep it simple: zero the whole allocation and never allocate less
  84          * than PAGE_SIZE.
  85          */
  86         if (alloc_size > PAGE_SIZE)
  87                 new_ldt->entries = vzalloc(alloc_size);
  88         else
  89                 new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);
  90
  91         if (!new_ldt->entries) {
  92                 kfree(new_ldt);
  93                 return NULL;
  94         }
  95
  96         /* The new LDT isn't aliased for PTI yet. */
  97         new_ldt->slot = -1;
  98
  99         new_ldt->nr_entries = num_entries;
 100         return new_ldt;
 101 }
 102
 103 #ifdef CONFIG_PAGE_TABLE_ISOLATION
 104
 105 static void do_sanity_check(struct mm_struct *mm,
 106                             bool had_kernel_mapping,
 107                             bool had_user_mapping)
 108 {
 109         if (mm->context.ldt) {
 110                 /*
 111                  * We already had an LDT.  The top-level entry should already
 112                  * have been allocated and synchronized with the usermode
 113                  * tables.
 114                  */
 115                 WARN_ON(!had_kernel_mapping);
 116                 if (boot_cpu_has(X86_FEATURE_PTI))
 117                         WARN_ON(!had_user_mapping);
 118         } else {
 119                 /*
 120                  * This is the first time we're mapping an LDT for this process.
 121                  * Sync the pgd to the usermode tables.
 122                  */
 123                 WARN_ON(had_kernel_mapping);
 124                 if (boot_cpu_has(X86_FEATURE_PTI))
 125                         WARN_ON(had_user_mapping);
 126         }
 127 }
 128
 129 #ifdef CONFIG_X86_PAE
 130
 131 static pmd_t *pgd_to_pmd_walk(pgd_t *pgd, unsigned long va)
 132 {
 133         p4d_t *p4d;
 134         pud_t *pud;
 135
 136         if (pgd->pgd == 0)
 137                 return NULL;
 138
 139         p4d = p4d_offset(pgd, va);
 140         if (p4d_none(*p4d))
 141                 return NULL;
 142
 143         pud = pud_offset(p4d, va);
 144         if (pud_none(*pud))
 145                 return NULL;
 146
 147         return pmd_offset(pud, va);
 148 }
 149
 150 static void map_ldt_struct_to_user(struct mm_struct *mm)
 151 {
 152         pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
 153         pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
 154         pmd_t *k_pmd, *u_pmd;
 155
 156         k_pmd = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
 157         u_pmd = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
 158
 159         if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
 160                 set_pmd(u_pmd, *k_pmd);
 161 }
 162
 163 static void sanity_check_ldt_mapping(struct mm_struct *mm)
 164 {
 165         pgd_t *k_pgd = pgd_offset(mm, LDT_BASE_ADDR);
 166         pgd_t *u_pgd = kernel_to_user_pgdp(k_pgd);
 167         bool had_kernel, had_user;
 168         pmd_t *k_pmd, *u_pmd;
 169
 170         k_pmd      = pgd_to_pmd_walk(k_pgd, LDT_BASE_ADDR);
 171         u_pmd      = pgd_to_pmd_walk(u_pgd, LDT_BASE_ADDR);
 172         had_kernel = (k_pmd->pmd != 0);
 173         had_user   = (u_pmd->pmd != 0);
 174
 175         do_sanity_check(mm, had_kernel, had_user);
 176 }
 177
 178 #else /* !CONFIG_X86_PAE */
 179
 180 static void map_ldt_struct_to_user(struct mm_struct *mm)
 181 {
 182         pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
 183
 184         if (boot_cpu_has(X86_FEATURE_PTI) && !mm->context.ldt)
 185                 set_pgd(kernel_to_user_pgdp(pgd), *pgd);
 186 }
 187
 188 static void sanity_check_ldt_mapping(struct mm_struct *mm)
 189 {
 190         pgd_t *pgd = pgd_offset(mm, LDT_BASE_ADDR);
 191         bool had_kernel = (pgd->pgd != 0);
 192         bool had_user   = (kernel_to_user_pgdp(pgd)->pgd != 0);
 193
 194         do_sanity_check(mm, had_kernel, had_user);
 195 }
 196
 197 #endif /* CONFIG_X86_PAE */
 198
 199 /*
 200  * If PTI is enabled, this maps the LDT into the kernelmode and
 201  * usermode tables for the given mm.
 202  */
 203 static int
 204 map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
 205 {
 206         unsigned long va;
 207         bool is_vmalloc;
 208         spinlock_t *ptl;
 209         int i, nr_pages;
 210
 211         if (!boot_cpu_has(X86_FEATURE_PTI))
 212                 return 0;
 213
 214         /*
 215          * Any given ldt_struct should have map_ldt_struct() called at most
 216          * once.
 217          */
 218         WARN_ON(ldt->slot != -1);
 219
 220         /* Check if the current mappings are sane */
 221         sanity_check_ldt_mapping(mm);
 222
 223         is_vmalloc = is_vmalloc_addr(ldt->entries);
 224
 225         nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
 226
 227         for (i = 0; i < nr_pages; i++) {
 228                 unsigned long offset = i << PAGE_SHIFT;
 229                 const void *src = (char *)ldt->entries + offset;
 230                 unsigned long pfn;
 231                 pgprot_t pte_prot;
 232                 pte_t pte, *ptep;
 233
 234                 va = (unsigned long)ldt_slot_va(slot) + offset;
 235                 pfn = is_vmalloc ? vmalloc_to_pfn(src) :
 236                         page_to_pfn(virt_to_page(src));
 237                 /*
 238                  * Treat the PTI LDT range as a *userspace* range.
 239                  * get_locked_pte() will allocate all needed pagetables
 240                  * and account for them in this mm.
 241                  */
 242                 ptep = get_locked_pte(mm, va, &ptl);
 243                 if (!ptep)
 244                         return -ENOMEM;
 245                 /*
 246                  * Map it RO so the easy to find address is not a primary
 247                  * target via some kernel interface which misses a
 248                  * permission check.
 249                  */
 250                 pte_prot = __pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL);
 251                 /* Filter out unsuppored __PAGE_KERNEL* bits: */
 252                 pgprot_val(pte_prot) &= __supported_pte_mask;
 253                 pte = pfn_pte(pfn, pte_prot);
 254                 set_pte_at(mm, va, ptep, pte);
 255                 pte_unmap_unlock(ptep, ptl);
 256         }
 257
 258         /* Propagate LDT mapping to the user page-table */
 259         map_ldt_struct_to_user(mm);
 260
 261         ldt->slot = slot;
 262         return 0;
 263 }
 264
 265 static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
 266 {
 267         unsigned long va;
 268         int i, nr_pages;
 269
 270         if (!ldt)
 271                 return;
 272
 273         /* LDT map/unmap is only required for PTI */
 274         if (!boot_cpu_has(X86_FEATURE_PTI))
 275                 return;
 276
 277         nr_pages = DIV_ROUND_UP(ldt->nr_entries * LDT_ENTRY_SIZE, PAGE_SIZE);
 278
 279         for (i = 0; i < nr_pages; i++) {
 280                 unsigned long offset = i << PAGE_SHIFT;
 281                 spinlock_t *ptl;
 282                 pte_t *ptep;
 283
 284                 va = (unsigned long)ldt_slot_va(ldt->slot) + offset;
 285                 ptep = get_locked_pte(mm, va, &ptl);
 286                 pte_clear(mm, va, ptep);
 287                 pte_unmap_unlock(ptep, ptl);
 288         }
 289
 290         va = (unsigned long)ldt_slot_va(ldt->slot);
 291         flush_tlb_mm_range(mm, va, va + nr_pages * PAGE_SIZE, PAGE_SHIFT, false);
 292 }
 293
 294 #else /* !CONFIG_PAGE_TABLE_ISOLATION */
 295
 296 static int
 297 map_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt, int slot)
 298 {
 299         return 0;
 300 }
 301
 302 static void unmap_ldt_struct(struct mm_struct *mm, struct ldt_struct *ldt)
 303 {
 304 }
 305 #endif /* CONFIG_PAGE_TABLE_ISOLATION */
 306
 307 static void free_ldt_pgtables(struct mm_struct *mm)
 308 {
 309 #ifdef CONFIG_PAGE_TABLE_ISOLATION
 310         struct mmu_gather tlb;
 311         unsigned long start = LDT_BASE_ADDR;
 312         unsigned long end = LDT_END_ADDR;
 313
 314         if (!boot_cpu_has(X86_FEATURE_PTI))
 315                 return;
 316
 317         tlb_gather_mmu(&tlb, mm, start, end);
 318         free_pgd_range(&tlb, start, end, start, end);
 319         tlb_finish_mmu(&tlb, start, end);
 320 #endif
 321 }
 322
 323 /* After calling this, the LDT is immutable. */
 324 static void finalize_ldt_struct(struct ldt_struct *ldt)
 325 {
 326         paravirt_alloc_ldt(ldt->entries, ldt->nr_entries);
 327 }
 328
 329 static void install_ldt(struct mm_struct *mm, struct ldt_struct *ldt)
 330 {
 331         mutex_lock(&mm->context.lock);
 332
 333         /* Synchronizes with READ_ONCE in load_mm_ldt. */
 334         smp_store_release(&mm->context.ldt, ldt);
 335
 336         /* Activate the LDT for all CPUs using currents mm. */
 337         on_each_cpu_mask(mm_cpumask(mm), flush_ldt, mm, true);
 338
 339         mutex_unlock(&mm->context.lock);
 340 }
 341
 342 static void free_ldt_struct(struct ldt_struct *ldt)
 343 {
 344         if (likely(!ldt))
 345                 return;
 346
 347         paravirt_free_ldt(ldt->entries, ldt->nr_entries);
 348         if (ldt->nr_entries * LDT_ENTRY_SIZE > PAGE_SIZE)
 349                 vfree_atomic(ldt->entries);
 350         else
 351                 free_page((unsigned long)ldt->entries);
 352         kfree(ldt);
 353 }
 354
 355 /*
 356  * Called on fork from arch_dup_mmap(). Just copy the current LDT state,
 357  * the new task is not running, so nothing can be installed.
 358  */
 359 int ldt_dup_context(struct mm_struct *old_mm, struct mm_struct *mm)
 360 {
 361         struct ldt_struct *new_ldt;
 362         int retval = 0;
 363
 364         if (!old_mm)
 365                 return 0;
 366
 367         mutex_lock(&old_mm->context.lock);
 368         if (!old_mm->context.ldt)
 369                 goto out_unlock;
 370
 371         new_ldt = alloc_ldt_struct(old_mm->context.ldt->nr_entries);
 372         if (!new_ldt) {
 373                 retval = -ENOMEM;
 374                 goto out_unlock;
 375         }
 376
 377         memcpy(new_ldt->entries, old_mm->context.ldt->entries,
 378                new_ldt->nr_entries * LDT_ENTRY_SIZE);
 379         finalize_ldt_struct(new_ldt);
 380
 381         retval = map_ldt_struct(mm, new_ldt, 0);
 382         if (retval) {
 383                 free_ldt_pgtables(mm);
 384                 free_ldt_struct(new_ldt);
 385                 goto out_unlock;
 386         }
 387         mm->context.ldt = new_ldt;
 388
 389 out_unlock:
 390         mutex_unlock(&old_mm->context.lock);
 391         return retval;
 392 }
 393
 394 /*
 395  * No need to lock the MM as we are the last user
 396  *
 397  * 64bit: Don't touch the LDT register - we're already in the next thread.
 398  */
 399 void destroy_context_ldt(struct mm_struct *mm)
 400 {
 401         free_ldt_struct(mm->context.ldt);
 402         mm->context.ldt = NULL;
 403 }
 404
 405 void ldt_arch_exit_mmap(struct mm_struct *mm)
 406 {
 407         free_ldt_pgtables(mm);
 408 }
 409
 410 static int read_ldt(void __user *ptr, unsigned long bytecount)
 411 {
 412         struct mm_struct *mm = current->mm;
 413         unsigned long entries_size;
 414         int retval;
 415
 416         down_read(&mm->context.ldt_usr_sem);
 417
 418         if (!mm->context.ldt) {
 419                 retval = 0;
 420                 goto out_unlock;
 421         }
 422
 423         if (bytecount > LDT_ENTRY_SIZE * LDT_ENTRIES)
 424                 bytecount = LDT_ENTRY_SIZE * LDT_ENTRIES;
 425
 426         entries_size = mm->context.ldt->nr_entries * LDT_ENTRY_SIZE;
 427         if (entries_size > bytecount)
 428                 entries_size = bytecount;
 429
 430         if (copy_to_user(ptr, mm->context.ldt->entries, entries_size)) {
 431                 retval = -EFAULT;
 432                 goto out_unlock;
 433         }
 434
 435         if (entries_size != bytecount) {
 436                 /* Zero-fill the rest and pretend we read bytecount bytes. */
 437                 if (clear_user(ptr + entries_size, bytecount - entries_size)) {
 438                         retval = -EFAULT;
 439                         goto out_unlock;
 440                 }
 441         }
 442         retval = bytecount;
 443
 444 out_unlock:
 445         up_read(&mm->context.ldt_usr_sem);
 446         return retval;
 447 }
 448
 449 static int read_default_ldt(void __user *ptr, unsigned long bytecount)
 450 {
 451         /* CHECKME: Can we use _one_ random number ? */
 452 #ifdef CONFIG_X86_32
 453         unsigned long size = 5 * sizeof(struct desc_struct);
 454 #else
 455         unsigned long size = 128;
 456 #endif
 457         if (bytecount > size)
 458                 bytecount = size;
 459         if (clear_user(ptr, bytecount))
 460                 return -EFAULT;
 461         return bytecount;
 462 }
 463
 464 static int write_ldt(void __user *ptr, unsigned long bytecount, int oldmode)
 465 {
 466         struct mm_struct *mm = current->mm;
 467         struct ldt_struct *new_ldt, *old_ldt;
 468         unsigned int old_nr_entries, new_nr_entries;
 469         struct user_desc ldt_info;
 470         struct desc_struct ldt;
 471         int error;
 472
 473         error = -EINVAL;
 474         if (bytecount != sizeof(ldt_info))
 475                 goto out;
 476         error = -EFAULT;
 477         if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
 478                 goto out;
 479
 480         error = -EINVAL;
 481         if (ldt_info.entry_number >= LDT_ENTRIES)
 482                 goto out;
 483         if (ldt_info.contents == 3) {
 484                 if (oldmode)
 485                         goto out;
 486                 if (ldt_info.seg_not_present == 0)
 487                         goto out;
 488         }
 489
 490         if ((oldmode && !ldt_info.base_addr && !ldt_info.limit) ||
 491             LDT_empty(&ldt_info)) {
 492                 /* The user wants to clear the entry. */
 493                 memset(&ldt, 0, sizeof(ldt));
 494         } else {
 495                 if (!IS_ENABLED(CONFIG_X86_16BIT) && !ldt_info.seg_32bit) {
 496                         error = -EINVAL;
 497                         goto out;
 498                 }
 499
 500                 fill_ldt(&ldt, &ldt_info);
 501                 if (oldmode)
 502                         ldt.avl = 0;
 503         }
 504
 505         if (down_write_killable(&mm->context.ldt_usr_sem))
 506                 return -EINTR;
 507
 508         old_ldt       = mm->context.ldt;
 509         old_nr_entries = old_ldt ? old_ldt->nr_entries : 0;
 510         new_nr_entries = max(ldt_info.entry_number + 1, old_nr_entries);
 511
 512         error = -ENOMEM;
 513         new_ldt = alloc_ldt_struct(new_nr_entries);
 514         if (!new_ldt)
 515                 goto out_unlock;
 516
 517         if (old_ldt)
 518                 memcpy(new_ldt->entries, old_ldt->entries, old_nr_entries * LDT_ENTRY_SIZE);
 519
 520         new_ldt->entries[ldt_info.entry_number] = ldt;
 521         finalize_ldt_struct(new_ldt);
 522
 523         /*
 524          * If we are using PTI, map the new LDT into the userspace pagetables.
 525          * If there is already an LDT, use the other slot so that other CPUs
 526          * will continue to use the old LDT until install_ldt() switches
 527          * them over to the new LDT.
 528          */
 529         error = map_ldt_struct(mm, new_ldt, old_ldt ? !old_ldt->slot : 0);
 530         if (error) {
 531                 /*
 532                  * This only can fail for the first LDT setup. If an LDT is
 533                  * already installed then the PTE page is already
 534                  * populated. Mop up a half populated page table.
 535                  */
 536                 if (!WARN_ON_ONCE(old_ldt))
 537                         free_ldt_pgtables(mm);
 538                 free_ldt_struct(new_ldt);
 539                 goto out_unlock;
 540         }
 541
 542         install_ldt(mm, new_ldt);
 543         unmap_ldt_struct(mm, old_ldt);
 544         free_ldt_struct(old_ldt);
 545         error = 0;
 546
 547 out_unlock:
 548         up_write(&mm->context.ldt_usr_sem);
 549 out:
 550         return error;
 551 }
 552
 553 SYSCALL_DEFINE3(modify_ldt, int , func , void __user * , ptr ,
 554                 unsigned long , bytecount)
 555 {
 556         int ret = -ENOSYS;
 557
 558         switch (func) {
 559         case 0:
 560                 ret = read_ldt(ptr, bytecount);
 561                 break;
 562         case 1:
 563                 ret = write_ldt(ptr, bytecount, 1);
 564                 break;
 565         case 2:
 566                 ret = read_default_ldt(ptr, bytecount);
 567                 break;
 568         case 0x11:
 569                 ret = write_ldt(ptr, bytecount, 0);
 570                 break;
 571         }
 572         /*
 573          * The SYSCALL_DEFINE() macros give us an 'unsigned long'
 574          * return type, but tht ABI for sys_modify_ldt() expects
 575          * 'int'.  This cast gives us an int-sized value in %rax
 576          * for the return code.  The 'unsigned' is necessary so
 577          * the compiler does not try to sign-extend the negative
 578          * return codes into the high half of the register when
 579          * taking the value from int->long.
 580          */
 581         return (unsigned int)ret;
 582 }