Merge tag 'x86_sgx_for_v6.0-2022-08-03.1' of git://git.kernel.org/pub/scm/linux/kerne...

[sfrench/cifs-2.6.git] / arch / x86 / kernel / cpu / sgx / encl.c
diff --git a/arch/x86/kernel/cpu/sgx/encl.c b/arch/x86/kernel/cpu/sgx/encl.c

index 19876ebfb50447a2641b48aa211453471cb031f9..24c1bb8eb196252708444245e8b5b684c68d184a 100644 (file)
--- a/arch/x86/kernel/cpu/sgx/encl.c
+++ b/arch/x86/kernel/cpu/sgx/encl.c
@@ -232,25 +232,10 @@ static struct sgx_epc_page *sgx_encl_eldu(struct sgx_encl_page *encl_page,
         return epc_page;
  }
  
-static struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
-                                               unsigned long addr,
-                                               unsigned long vm_flags)
+static struct sgx_encl_page *__sgx_encl_load_page(struct sgx_encl *encl,
+                                                 struct sgx_encl_page *entry)
  {
-       unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
         struct sgx_epc_page *epc_page;
-       struct sgx_encl_page *entry;
-
-       entry = xa_load(&encl->page_array, PFN_DOWN(addr));
-       if (!entry)
-               return ERR_PTR(-EFAULT);
-
-       /*
-        * Verify that the faulted page has equal or higher build time
-        * permissions than the VMA permissions (i.e. the subset of {VM_READ,
-        * VM_WRITE, VM_EXECUTE} in vma->vm_flags).
-        */
-       if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits)
-               return ERR_PTR(-EFAULT);
  
         /* Entry successfully located. */
         if (entry->epc_page) {
@@ -276,6 +261,146 @@ static struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
         return entry;
  }
  
+static struct sgx_encl_page *sgx_encl_load_page_in_vma(struct sgx_encl *encl,
+                                                      unsigned long addr,
+                                                      unsigned long vm_flags)
+{
+       unsigned long vm_prot_bits = vm_flags & (VM_READ | VM_WRITE | VM_EXEC);
+       struct sgx_encl_page *entry;
+
+       entry = xa_load(&encl->page_array, PFN_DOWN(addr));
+       if (!entry)
+               return ERR_PTR(-EFAULT);
+
+       /*
+        * Verify that the page has equal or higher build time
+        * permissions than the VMA permissions (i.e. the subset of {VM_READ,
+        * VM_WRITE, VM_EXECUTE} in vma->vm_flags).
+        */
+       if ((entry->vm_max_prot_bits & vm_prot_bits) != vm_prot_bits)
+               return ERR_PTR(-EFAULT);
+
+       return __sgx_encl_load_page(encl, entry);
+}
+
+struct sgx_encl_page *sgx_encl_load_page(struct sgx_encl *encl,
+                                        unsigned long addr)
+{
+       struct sgx_encl_page *entry;
+
+       entry = xa_load(&encl->page_array, PFN_DOWN(addr));
+       if (!entry)
+               return ERR_PTR(-EFAULT);
+
+       return __sgx_encl_load_page(encl, entry);
+}
+
+/**
+ * sgx_encl_eaug_page() - Dynamically add page to initialized enclave
+ * @vma:       VMA obtained from fault info from where page is accessed
+ * @encl:      enclave accessing the page
+ * @addr:      address that triggered the page fault
+ *
+ * When an initialized enclave accesses a page with no backing EPC page
+ * on a SGX2 system then the EPC can be added dynamically via the SGX2
+ * ENCLS[EAUG] instruction.
+ *
+ * Returns: Appropriate vm_fault_t: VM_FAULT_NOPAGE when PTE was installed
+ * successfully, VM_FAULT_SIGBUS or VM_FAULT_OOM as error otherwise.
+ */
+static vm_fault_t sgx_encl_eaug_page(struct vm_area_struct *vma,
+                                    struct sgx_encl *encl, unsigned long addr)
+{
+       vm_fault_t vmret = VM_FAULT_SIGBUS;
+       struct sgx_pageinfo pginfo = {0};
+       struct sgx_encl_page *encl_page;
+       struct sgx_epc_page *epc_page;
+       struct sgx_va_page *va_page;
+       unsigned long phys_addr;
+       u64 secinfo_flags;
+       int ret;
+
+       if (!test_bit(SGX_ENCL_INITIALIZED, &encl->flags))
+               return VM_FAULT_SIGBUS;
+
+       /*
+        * Ignore internal permission checking for dynamically added pages.
+        * They matter only for data added during the pre-initialization
+        * phase. The enclave decides the permissions by the means of
+        * EACCEPT, EACCEPTCOPY and EMODPE.
+        */
+       secinfo_flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_X;
+       encl_page = sgx_encl_page_alloc(encl, addr - encl->base, secinfo_flags);
+       if (IS_ERR(encl_page))
+               return VM_FAULT_OOM;
+
+       mutex_lock(&encl->lock);
+
+       epc_page = sgx_alloc_epc_page(encl_page, false);
+       if (IS_ERR(epc_page)) {
+               if (PTR_ERR(epc_page) == -EBUSY)
+                       vmret =  VM_FAULT_NOPAGE;
+               goto err_out_unlock;
+       }
+
+       va_page = sgx_encl_grow(encl, false);
+       if (IS_ERR(va_page))
+               goto err_out_epc;
+
+       if (va_page)
+               list_add(&va_page->list, &encl->va_pages);
+
+       ret = xa_insert(&encl->page_array, PFN_DOWN(encl_page->desc),
+                       encl_page, GFP_KERNEL);
+       /*
+        * If ret == -EBUSY then page was created in another flow while
+        * running without encl->lock
+        */
+       if (ret)
+               goto err_out_shrink;
+
+       pginfo.secs = (unsigned long)sgx_get_epc_virt_addr(encl->secs.epc_page);
+       pginfo.addr = encl_page->desc & PAGE_MASK;
+       pginfo.metadata = 0;
+
+       ret = __eaug(&pginfo, sgx_get_epc_virt_addr(epc_page));
+       if (ret)
+               goto err_out;
+
+       encl_page->encl = encl;
+       encl_page->epc_page = epc_page;
+       encl_page->type = SGX_PAGE_TYPE_REG;
+       encl->secs_child_cnt++;
+
+       sgx_mark_page_reclaimable(encl_page->epc_page);
+
+       phys_addr = sgx_get_epc_phys_addr(epc_page);
+       /*
+        * Do not undo everything when creating PTE entry fails - next #PF
+        * would find page ready for a PTE.
+        */
+       vmret = vmf_insert_pfn(vma, addr, PFN_DOWN(phys_addr));
+       if (vmret != VM_FAULT_NOPAGE) {
+               mutex_unlock(&encl->lock);
+               return VM_FAULT_SIGBUS;
+       }
+       mutex_unlock(&encl->lock);
+       return VM_FAULT_NOPAGE;
+
+err_out:
+       xa_erase(&encl->page_array, PFN_DOWN(encl_page->desc));
+
+err_out_shrink:
+       sgx_encl_shrink(encl, va_page);
+err_out_epc:
+       sgx_encl_free_epc_page(epc_page);
+err_out_unlock:
+       mutex_unlock(&encl->lock);
+       kfree(encl_page);
+
+       return vmret;
+}
+
  static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
  {
         unsigned long addr = (unsigned long)vmf->address;
@@ -295,9 +420,20 @@ static vm_fault_t sgx_vma_fault(struct vm_fault *vmf)
         if (unlikely(!encl))
                 return VM_FAULT_SIGBUS;
  
+       /*
+        * The page_array keeps track of all enclave pages, whether they
+        * are swapped out or not. If there is no entry for this page and
+        * the system supports SGX2 then it is possible to dynamically add
+        * a new enclave page. This is only possible for an initialized
+        * enclave that will be checked for right away.
+        */
+       if (cpu_feature_enabled(X86_FEATURE_SGX2) &&
+           (!xa_load(&encl->page_array, PFN_DOWN(addr))))
+               return sgx_encl_eaug_page(vma, encl, addr);
+
         mutex_lock(&encl->lock);
  
-       entry = sgx_encl_load_page(encl, addr, vma->vm_flags);
+       entry = sgx_encl_load_page_in_vma(encl, addr, vma->vm_flags);
         if (IS_ERR(entry)) {
                 mutex_unlock(&encl->lock);
  
@@ -367,6 +503,11 @@ int sgx_encl_may_map(struct sgx_encl *encl, unsigned long start,
  
         XA_STATE(xas, &encl->page_array, PFN_DOWN(start));
  
+       /* Disallow mapping outside enclave's address range. */
+       if (test_bit(SGX_ENCL_INITIALIZED, &encl->flags) &&
+           (start < encl->base || end > encl->base + encl->size))
+               return -EACCES;
+
         /*
          * Disallow READ_IMPLIES_EXEC tasks as their VMA permissions might
          * conflict with the enclave page permissions.
@@ -445,7 +586,7 @@ static struct sgx_encl_page *sgx_encl_reserve_page(struct sgx_encl *encl,
         for ( ; ; ) {
                 mutex_lock(&encl->lock);
  
-               entry = sgx_encl_load_page(encl, addr, vm_flags);
+               entry = sgx_encl_load_page_in_vma(encl, addr, vm_flags);
                 if (PTR_ERR(entry) != -EBUSY)
                         break;
  
@@ -687,7 +828,7 @@ int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
  
         spin_lock(&encl->mm_lock);
         list_add_rcu(&encl_mm->list, &encl->mm_list);
-       /* Pairs with smp_rmb() in sgx_reclaimer_block(). */
+       /* Pairs with smp_rmb() in sgx_zap_enclave_ptes(). */
         smp_wmb();
         encl->mm_list_version++;
         spin_unlock(&encl->mm_lock);
@@ -695,6 +836,73 @@ int sgx_encl_mm_add(struct sgx_encl *encl, struct mm_struct *mm)
         return 0;
  }
  
+/**
+ * sgx_encl_cpumask() - Query which CPUs might be accessing the enclave
+ * @encl: the enclave
+ *
+ * Some SGX functions require that no cached linear-to-physical address
+ * mappings are present before they can succeed. For example, ENCLS[EWB]
+ * copies a page from the enclave page cache to regular main memory but
+ * it fails if it cannot ensure that there are no cached
+ * linear-to-physical address mappings referring to the page.
+ *
+ * SGX hardware flushes all cached linear-to-physical mappings on a CPU
+ * when an enclave is exited via ENCLU[EEXIT] or an Asynchronous Enclave
+ * Exit (AEX). Exiting an enclave will thus ensure cached linear-to-physical
+ * address mappings are cleared but coordination with the tracking done within
+ * the SGX hardware is needed to support the SGX functions that depend on this
+ * cache clearing.
+ *
+ * When the ENCLS[ETRACK] function is issued on an enclave the hardware
+ * tracks threads operating inside the enclave at that time. The SGX
+ * hardware tracking require that all the identified threads must have
+ * exited the enclave in order to flush the mappings before a function such
+ * as ENCLS[EWB] will be permitted
+ *
+ * The following flow is used to support SGX functions that require that
+ * no cached linear-to-physical address mappings are present:
+ * 1) Execute ENCLS[ETRACK] to initiate hardware tracking.
+ * 2) Use this function (sgx_encl_cpumask()) to query which CPUs might be
+ *    accessing the enclave.
+ * 3) Send IPI to identified CPUs, kicking them out of the enclave and
+ *    thus flushing all locally cached linear-to-physical address mappings.
+ * 4) Execute SGX function.
+ *
+ * Context: It is required to call this function after ENCLS[ETRACK].
+ *          This will ensure that if any new mm appears (racing with
+ *          sgx_encl_mm_add()) then the new mm will enter into the
+ *          enclave with fresh linear-to-physical address mappings.
+ *
+ *          It is required that all IPIs are completed before a new
+ *          ENCLS[ETRACK] is issued so be sure to protect steps 1 to 3
+ *          of the above flow with the enclave's mutex.
+ *
+ * Return: cpumask of CPUs that might be accessing @encl
+ */
+const cpumask_t *sgx_encl_cpumask(struct sgx_encl *encl)
+{
+       cpumask_t *cpumask = &encl->cpumask;
+       struct sgx_encl_mm *encl_mm;
+       int idx;
+
+       cpumask_clear(cpumask);
+
+       idx = srcu_read_lock(&encl->srcu);
+
+       list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
+               if (!mmget_not_zero(encl_mm->mm))
+                       continue;
+
+               cpumask_or(cpumask, cpumask, mm_cpumask(encl_mm->mm));
+
+               mmput_async(encl_mm->mm);
+       }
+
+       srcu_read_unlock(&encl->srcu, idx);
+
+       return cpumask;
+}
+
  static struct page *sgx_encl_get_backing_page(struct sgx_encl *encl,
                                               pgoff_t index)
  {
@@ -735,7 +943,6 @@ static int sgx_encl_get_backing(struct sgx_encl *encl, unsigned long page_index,
                 return PTR_ERR(pcmd);
         }
  
-       backing->page_index = page_index;
         backing->contents = contents;
         backing->pcmd = pcmd;
         backing->pcmd_offset = page_pcmd_off & (PAGE_SIZE - 1);
@@ -902,8 +1109,85 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
         return ret;
  }
  
+struct sgx_encl_page *sgx_encl_page_alloc(struct sgx_encl *encl,
+                                         unsigned long offset,
+                                         u64 secinfo_flags)
+{
+       struct sgx_encl_page *encl_page;
+       unsigned long prot;
+
+       encl_page = kzalloc(sizeof(*encl_page), GFP_KERNEL);
+       if (!encl_page)
+               return ERR_PTR(-ENOMEM);
+
+       encl_page->desc = encl->base + offset;
+       encl_page->encl = encl;
+
+       prot = _calc_vm_trans(secinfo_flags, SGX_SECINFO_R, PROT_READ)  |
+              _calc_vm_trans(secinfo_flags, SGX_SECINFO_W, PROT_WRITE) |
+              _calc_vm_trans(secinfo_flags, SGX_SECINFO_X, PROT_EXEC);
+
+       /*
+        * TCS pages must always RW set for CPU access while the SECINFO
+        * permissions are *always* zero - the CPU ignores the user provided
+        * values and silently overwrites them with zero permissions.
+        */
+       if ((secinfo_flags & SGX_SECINFO_PAGE_TYPE_MASK) == SGX_SECINFO_TCS)
+               prot |= PROT_READ | PROT_WRITE;
+
+       /* Calculate maximum of the VM flags for the page. */
+       encl_page->vm_max_prot_bits = calc_vm_prot_bits(prot, 0);
+
+       return encl_page;
+}
+
+/**
+ * sgx_zap_enclave_ptes() - remove PTEs mapping the address from enclave
+ * @encl: the enclave
+ * @addr: page aligned pointer to single page for which PTEs will be removed
+ *
+ * Multiple VMAs may have an enclave page mapped. Remove the PTE mapping
+ * @addr from each VMA. Ensure that page fault handler is ready to handle
+ * new mappings of @addr before calling this function.
+ */
+void sgx_zap_enclave_ptes(struct sgx_encl *encl, unsigned long addr)
+{
+       unsigned long mm_list_version;
+       struct sgx_encl_mm *encl_mm;
+       struct vm_area_struct *vma;
+       int idx, ret;
+
+       do {
+               mm_list_version = encl->mm_list_version;
+
+               /* Pairs with smp_wmb() in sgx_encl_mm_add(). */
+               smp_rmb();
+
+               idx = srcu_read_lock(&encl->srcu);
+
+               list_for_each_entry_rcu(encl_mm, &encl->mm_list, list) {
+                       if (!mmget_not_zero(encl_mm->mm))
+                               continue;
+
+                       mmap_read_lock(encl_mm->mm);
+
+                       ret = sgx_encl_find(encl_mm->mm, addr, &vma);
+                       if (!ret && encl == vma->vm_private_data)
+                               zap_vma_ptes(vma, addr, PAGE_SIZE);
+
+                       mmap_read_unlock(encl_mm->mm);
+
+                       mmput_async(encl_mm->mm);
+               }
+
+               srcu_read_unlock(&encl->srcu, idx);
+       } while (unlikely(encl->mm_list_version != mm_list_version));
+}
+
  /**
   * sgx_alloc_va_page() - Allocate a Version Array (VA) page
+ * @reclaim: Reclaim EPC pages directly if none available. Enclave
+ *           mutex should not be held if this is set.
   *
   * Allocate a free EPC page and convert it to a Version Array (VA) page.
   *
@@ -911,12 +1195,12 @@ int sgx_encl_test_and_clear_young(struct mm_struct *mm,
   *   a VA page,
   *   -errno otherwise
   */
-struct sgx_epc_page *sgx_alloc_va_page(void)
+struct sgx_epc_page *sgx_alloc_va_page(bool reclaim)
  {
         struct sgx_epc_page *epc_page;
         int ret;
  
-       epc_page = sgx_alloc_epc_page(NULL, true);
+       epc_page = sgx_alloc_epc_page(NULL, reclaim);
         if (IS_ERR(epc_page))
                 return ERR_CAST(epc_page);