Merge branches 'work.misc' and 'work.dcache' of git://git.kernel.org/pub/scm/linux...
[sfrench/cifs-2.6.git] / arch / x86 / kernel / cpu / intel_rdt_rdtgroup.c
index cde0f4114d4ed44ef18d38e57aa6ec3f03319d15..d6d7ea7349d016bf479ca9024889c448fadbacf7 100644 (file)
@@ -22,6 +22,7 @@
 
 #include <linux/cacheinfo.h>
 #include <linux/cpu.h>
+#include <linux/debugfs.h>
 #include <linux/fs.h>
 #include <linux/sysfs.h>
 #include <linux/kernfs.h>
@@ -56,6 +57,8 @@ static struct kernfs_node *kn_mondata;
 static struct seq_buf last_cmd_status;
 static char last_cmd_status_buf[512];
 
+struct dentry *debugfs_resctrl;
+
 void rdt_last_cmd_clear(void)
 {
        lockdep_assert_held(&rdtgroup_mutex);
@@ -161,8 +164,10 @@ enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
 }
 
 static const char * const rdt_mode_str[] = {
-       [RDT_MODE_SHAREABLE]    = "shareable",
-       [RDT_MODE_EXCLUSIVE]    = "exclusive",
+       [RDT_MODE_SHAREABLE]            = "shareable",
+       [RDT_MODE_EXCLUSIVE]            = "exclusive",
+       [RDT_MODE_PSEUDO_LOCKSETUP]     = "pseudo-locksetup",
+       [RDT_MODE_PSEUDO_LOCKED]        = "pseudo-locked",
 };
 
 /**
@@ -260,8 +265,12 @@ static int rdtgroup_cpus_show(struct kernfs_open_file *of,
        rdtgrp = rdtgroup_kn_lock_live(of->kn);
 
        if (rdtgrp) {
-               seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
-                          cpumask_pr_args(&rdtgrp->cpu_mask));
+               if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
+                       seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
+                                  cpumask_pr_args(&rdtgrp->plr->d->cpu_mask));
+               else
+                       seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
+                                  cpumask_pr_args(&rdtgrp->cpu_mask));
        } else {
                ret = -ENOENT;
        }
@@ -447,6 +456,13 @@ static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
                goto unlock;
        }
 
+       if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
+           rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+               ret = -EINVAL;
+               rdt_last_cmd_puts("pseudo-locking in progress\n");
+               goto unlock;
+       }
+
        if (is_cpu_list(of))
                ret = cpulist_parse(buf, newmask);
        else
@@ -562,6 +578,32 @@ static int __rdtgroup_move_task(struct task_struct *tsk,
        return ret;
 }
 
+/**
+ * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
+ * @r: Resource group
+ *
+ * Return: 1 if tasks have been assigned to @r, 0 otherwise
+ */
+int rdtgroup_tasks_assigned(struct rdtgroup *r)
+{
+       struct task_struct *p, *t;
+       int ret = 0;
+
+       lockdep_assert_held(&rdtgroup_mutex);
+
+       rcu_read_lock();
+       for_each_process_thread(p, t) {
+               if ((r->type == RDTCTRL_GROUP && t->closid == r->closid) ||
+                   (r->type == RDTMON_GROUP && t->rmid == r->mon.rmid)) {
+                       ret = 1;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+
+       return ret;
+}
+
 static int rdtgroup_task_write_permission(struct task_struct *task,
                                          struct kernfs_open_file *of)
 {
@@ -623,13 +665,22 @@ static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
        if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
                return -EINVAL;
        rdtgrp = rdtgroup_kn_lock_live(of->kn);
+       if (!rdtgrp) {
+               rdtgroup_kn_unlock(of->kn);
+               return -ENOENT;
+       }
        rdt_last_cmd_clear();
 
-       if (rdtgrp)
-               ret = rdtgroup_move_task(pid, rdtgrp, of);
-       else
-               ret = -ENOENT;
+       if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
+           rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+               ret = -EINVAL;
+               rdt_last_cmd_puts("pseudo-locking in progress\n");
+               goto unlock;
+       }
+
+       ret = rdtgroup_move_task(pid, rdtgrp, of);
 
+unlock:
        rdtgroup_kn_unlock(of->kn);
 
        return ret ?: nbytes;
@@ -727,14 +778,16 @@ static int rdt_shareable_bits_show(struct kernfs_open_file *of,
  *   H - currently used by hardware only but available for software use
  *   S - currently used and shareable by software only
  *   E - currently used exclusively by one resource group
+ *   P - currently pseudo-locked by one resource group
  */
 static int rdt_bit_usage_show(struct kernfs_open_file *of,
                              struct seq_file *seq, void *v)
 {
        struct rdt_resource *r = of->kn->parent->priv;
-       u32 sw_shareable, hw_shareable, exclusive;
+       u32 sw_shareable = 0, hw_shareable = 0;
+       u32 exclusive = 0, pseudo_locked = 0;
        struct rdt_domain *dom;
-       int i, hwb, swb, excl;
+       int i, hwb, swb, excl, psl;
        enum rdtgrp_mode mode;
        bool sep = false;
        u32 *ctrl;
@@ -759,6 +812,16 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
                        case RDT_MODE_EXCLUSIVE:
                                exclusive |= *ctrl;
                                break;
+                       case RDT_MODE_PSEUDO_LOCKSETUP:
+                       /*
+                        * RDT_MODE_PSEUDO_LOCKSETUP is possible
+                        * here but not included since the CBM
+                        * associated with this CLOSID in this mode
+                        * is not initialized and no task or cpu can be
+                        * assigned this CLOSID.
+                        */
+                               break;
+                       case RDT_MODE_PSEUDO_LOCKED:
                        case RDT_NUM_MODES:
                                WARN(1,
                                     "invalid mode for closid %d\n", i);
@@ -766,9 +829,11 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
                        }
                }
                for (i = r->cache.cbm_len - 1; i >= 0; i--) {
+                       pseudo_locked = dom->plr ? dom->plr->cbm : 0;
                        hwb = test_bit(i, (unsigned long *)&hw_shareable);
                        swb = test_bit(i, (unsigned long *)&sw_shareable);
                        excl = test_bit(i, (unsigned long *)&exclusive);
+                       psl = test_bit(i, (unsigned long *)&pseudo_locked);
                        if (hwb && swb)
                                seq_putc(seq, 'X');
                        else if (hwb && !swb)
@@ -777,6 +842,8 @@ static int rdt_bit_usage_show(struct kernfs_open_file *of,
                                seq_putc(seq, 'S');
                        else if (excl)
                                seq_putc(seq, 'E');
+                       else if (psl)
+                               seq_putc(seq, 'P');
                        else /* Unused bits remain */
                                seq_putc(seq, '0');
                }
@@ -923,9 +990,10 @@ bool rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
        ctrl = d->ctrl_val;
        for (i = 0; i < r->num_closid; i++, ctrl++) {
                ctrl_b = (unsigned long *)ctrl;
-               if (closid_allocated(i) && i != closid) {
+               mode = rdtgroup_mode_by_closid(i);
+               if (closid_allocated(i) && i != closid &&
+                   mode != RDT_MODE_PSEUDO_LOCKSETUP) {
                        if (bitmap_intersects(cbm, ctrl_b, r->cache.cbm_len)) {
-                               mode = rdtgroup_mode_by_closid(i);
                                if (exclusive) {
                                        if (mode == RDT_MODE_EXCLUSIVE)
                                                return true;
@@ -995,10 +1063,24 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
        mode = rdtgrp->mode;
 
        if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
-           (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE))
+           (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
+           (!strcmp(buf, "pseudo-locksetup") &&
+            mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
+           (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
                goto out;
 
+       if (mode == RDT_MODE_PSEUDO_LOCKED) {
+               rdt_last_cmd_printf("cannot change pseudo-locked group\n");
+               ret = -EINVAL;
+               goto out;
+       }
+
        if (!strcmp(buf, "shareable")) {
+               if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+                       ret = rdtgroup_locksetup_exit(rdtgrp);
+                       if (ret)
+                               goto out;
+               }
                rdtgrp->mode = RDT_MODE_SHAREABLE;
        } else if (!strcmp(buf, "exclusive")) {
                if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
@@ -1006,7 +1088,17 @@ static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
                        ret = -EINVAL;
                        goto out;
                }
+               if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+                       ret = rdtgroup_locksetup_exit(rdtgrp);
+                       if (ret)
+                               goto out;
+               }
                rdtgrp->mode = RDT_MODE_EXCLUSIVE;
+       } else if (!strcmp(buf, "pseudo-locksetup")) {
+               ret = rdtgroup_locksetup_enter(rdtgrp);
+               if (ret)
+                       goto out;
+               rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
        } else {
                rdt_last_cmd_printf("unknown/unsupported mode\n");
                ret = -EINVAL;
@@ -1071,19 +1163,33 @@ static int rdtgroup_size_show(struct kernfs_open_file *of,
                return -ENOENT;
        }
 
+       if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
+               seq_printf(s, "%*s:", max_name_width, rdtgrp->plr->r->name);
+               size = rdtgroup_cbm_to_size(rdtgrp->plr->r,
+                                           rdtgrp->plr->d,
+                                           rdtgrp->plr->cbm);
+               seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
+               goto out;
+       }
+
        for_each_alloc_enabled_rdt_resource(r) {
                seq_printf(s, "%*s:", max_name_width, r->name);
                list_for_each_entry(d, &r->domains, list) {
                        if (sep)
                                seq_putc(s, ';');
-                       cbm = d->ctrl_val[rdtgrp->closid];
-                       size = rdtgroup_cbm_to_size(r, d, cbm);
+                       if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
+                               size = 0;
+                       } else {
+                               cbm = d->ctrl_val[rdtgrp->closid];
+                               size = rdtgroup_cbm_to_size(r, d, cbm);
+                       }
                        seq_printf(s, "%d=%u", d->id, size);
                        sep = true;
                }
                seq_putc(s, '\n');
        }
 
+out:
        rdtgroup_kn_unlock(of->kn);
 
        return 0;
@@ -1255,6 +1361,103 @@ error:
        return ret;
 }
 
+/**
+ * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
+ * @r: The resource group with which the file is associated.
+ * @name: Name of the file
+ *
+ * The permissions of named resctrl file, directory, or link are modified
+ * to not allow read, write, or execute by any user.
+ *
+ * WARNING: This function is intended to communicate to the user that the
+ * resctrl file has been locked down - that it is not relevant to the
+ * particular state the system finds itself in. It should not be relied
+ * on to protect from user access because after the file's permissions
+ * are restricted the user can still change the permissions using chmod
+ * from the command line.
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
+{
+       struct iattr iattr = {.ia_valid = ATTR_MODE,};
+       struct kernfs_node *kn;
+       int ret = 0;
+
+       kn = kernfs_find_and_get_ns(r->kn, name, NULL);
+       if (!kn)
+               return -ENOENT;
+
+       switch (kernfs_type(kn)) {
+       case KERNFS_DIR:
+               iattr.ia_mode = S_IFDIR;
+               break;
+       case KERNFS_FILE:
+               iattr.ia_mode = S_IFREG;
+               break;
+       case KERNFS_LINK:
+               iattr.ia_mode = S_IFLNK;
+               break;
+       }
+
+       ret = kernfs_setattr(kn, &iattr);
+       kernfs_put(kn);
+       return ret;
+}
+
+/**
+ * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
+ * @r: The resource group with which the file is associated.
+ * @name: Name of the file
+ * @mask: Mask of permissions that should be restored
+ *
+ * Restore the permissions of the named file. If @name is a directory the
+ * permissions of its parent will be used.
+ *
+ * Return: 0 on success, <0 on failure.
+ */
+int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
+                            umode_t mask)
+{
+       struct iattr iattr = {.ia_valid = ATTR_MODE,};
+       struct kernfs_node *kn, *parent;
+       struct rftype *rfts, *rft;
+       int ret, len;
+
+       rfts = res_common_files;
+       len = ARRAY_SIZE(res_common_files);
+
+       for (rft = rfts; rft < rfts + len; rft++) {
+               if (!strcmp(rft->name, name))
+                       iattr.ia_mode = rft->mode & mask;
+       }
+
+       kn = kernfs_find_and_get_ns(r->kn, name, NULL);
+       if (!kn)
+               return -ENOENT;
+
+       switch (kernfs_type(kn)) {
+       case KERNFS_DIR:
+               parent = kernfs_get_parent(kn);
+               if (parent) {
+                       iattr.ia_mode |= parent->mode;
+                       kernfs_put(parent);
+               }
+               iattr.ia_mode |= S_IFDIR;
+               break;
+       case KERNFS_FILE:
+               iattr.ia_mode |= S_IFREG;
+               break;
+       case KERNFS_LINK:
+               iattr.ia_mode |= S_IFLNK;
+               break;
+       }
+
+       ret = kernfs_setattr(kn, &iattr);
+       kernfs_put(kn);
+       return ret;
+}
+
 static int rdtgroup_mkdir_info_resdir(struct rdt_resource *r, char *name,
                                      unsigned long fflags)
 {
@@ -1596,6 +1799,9 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn)
 
        if (atomic_dec_and_test(&rdtgrp->waitcount) &&
            (rdtgrp->flags & RDT_DELETED)) {
+               if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
+                   rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
+                       rdtgroup_pseudo_lock_remove(rdtgrp);
                kernfs_unbreak_active_protection(kn);
                kernfs_put(rdtgrp->kn);
                kfree(rdtgrp);
@@ -1827,6 +2033,10 @@ static void rmdir_all_sub(void)
                if (rdtgrp == &rdtgroup_default)
                        continue;
 
+               if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
+                   rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
+                       rdtgroup_pseudo_lock_remove(rdtgrp);
+
                /*
                 * Give any CPUs back to the default group. We cannot copy
                 * cpu_online_mask because a CPU might have executed the
@@ -1863,6 +2073,7 @@ static void rdt_kill_sb(struct super_block *sb)
                reset_all_ctrls(r);
        cdp_disable_all();
        rmdir_all_sub();
+       rdt_pseudo_lock_release();
        rdtgroup_default.mode = RDT_MODE_SHAREABLE;
        static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
        static_branch_disable_cpuslocked(&rdt_mon_enable_key);
@@ -2131,11 +2342,15 @@ static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
                        for (i = 0; i < r->num_closid; i++, ctrl++) {
                                if (closid_allocated(i) && i != closid) {
                                        mode = rdtgroup_mode_by_closid(i);
+                                       if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
+                                               break;
                                        used_b |= *ctrl;
                                        if (mode == RDT_MODE_SHAREABLE)
                                                d->new_ctrl |= *ctrl;
                                }
                        }
+                       if (d->plr && d->plr->cbm > 0)
+                               used_b |= d->plr->cbm;
                        unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
                        unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
                        d->new_ctrl |= unused_b;
@@ -2185,6 +2400,14 @@ static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
                goto out_unlock;
        }
 
+       if (rtype == RDTMON_GROUP &&
+           (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
+            prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
+               ret = -EINVAL;
+               rdt_last_cmd_puts("pseudo-locking in progress\n");
+               goto out_unlock;
+       }
+
        /* allocate the rdtgroup. */
        rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
        if (!rdtgrp) {
@@ -2435,6 +2658,21 @@ static int rdtgroup_rmdir_mon(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
        return 0;
 }
 
+static int rdtgroup_ctrl_remove(struct kernfs_node *kn,
+                               struct rdtgroup *rdtgrp)
+{
+       rdtgrp->flags = RDT_DELETED;
+       list_del(&rdtgrp->rdtgroup_list);
+
+       /*
+        * one extra hold on this, will drop when we kfree(rdtgrp)
+        * in rdtgroup_kn_unlock()
+        */
+       kernfs_get(kn);
+       kernfs_remove(rdtgrp->kn);
+       return 0;
+}
+
 static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
                               cpumask_var_t tmpmask)
 {
@@ -2460,7 +2698,6 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
        cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
        update_closid_rmid(tmpmask, NULL);
 
-       rdtgrp->flags = RDT_DELETED;
        closid_free(rdtgrp->closid);
        free_rmid(rdtgrp->mon.rmid);
 
@@ -2469,14 +2706,7 @@ static int rdtgroup_rmdir_ctrl(struct kernfs_node *kn, struct rdtgroup *rdtgrp,
         */
        free_all_child_rdtgrp(rdtgrp);
 
-       list_del(&rdtgrp->rdtgroup_list);
-
-       /*
-        * one extra hold on this, will drop when we kfree(rdtgrp)
-        * in rdtgroup_kn_unlock()
-        */
-       kernfs_get(kn);
-       kernfs_remove(rdtgrp->kn);
+       rdtgroup_ctrl_remove(kn, rdtgrp);
 
        return 0;
 }
@@ -2504,13 +2734,19 @@ static int rdtgroup_rmdir(struct kernfs_node *kn)
         * If the rdtgroup is a mon group and parent directory
         * is a valid "mon_groups" directory, remove the mon group.
         */
-       if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn)
-               ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask);
-       else if (rdtgrp->type == RDTMON_GROUP &&
-                is_mon_groups(parent_kn, kn->name))
+       if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn) {
+               if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
+                   rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
+                       ret = rdtgroup_ctrl_remove(kn, rdtgrp);
+               } else {
+                       ret = rdtgroup_rmdir_ctrl(kn, rdtgrp, tmpmask);
+               }
+       } else if (rdtgrp->type == RDTMON_GROUP &&
+                is_mon_groups(parent_kn, kn->name)) {
                ret = rdtgroup_rmdir_mon(kn, rdtgrp, tmpmask);
-       else
+       } else {
                ret = -EPERM;
+       }
 
 out:
        rdtgroup_kn_unlock(kn);
@@ -2536,7 +2772,8 @@ static int __init rdtgroup_setup_root(void)
        int ret;
 
        rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
-                                     KERNFS_ROOT_CREATE_DEACTIVATED,
+                                     KERNFS_ROOT_CREATE_DEACTIVATED |
+                                     KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
                                      &rdtgroup_default);
        if (IS_ERR(rdt_root))
                return PTR_ERR(rdt_root);
@@ -2592,6 +2829,29 @@ int __init rdtgroup_init(void)
        if (ret)
                goto cleanup_mountpoint;
 
+       /*
+        * Adding the resctrl debugfs directory here may not be ideal since
+        * it would let the resctrl debugfs directory appear on the debugfs
+        * filesystem before the resctrl filesystem is mounted.
+        * It may also be ok since that would enable debugging of RDT before
+        * resctrl is mounted.
+        * The reason why the debugfs directory is created here and not in
+        * rdt_mount() is because rdt_mount() takes rdtgroup_mutex and
+        * during the debugfs directory creation also &sb->s_type->i_mutex_key
+        * (the lockdep class of inode->i_rwsem). Other filesystem
+        * interactions (eg. SyS_getdents) have the lock ordering:
+        * &sb->s_type->i_mutex_key --> &mm->mmap_sem
+        * During mmap(), called with &mm->mmap_sem, the rdtgroup_mutex
+        * is taken, thus creating dependency:
+        * &mm->mmap_sem --> rdtgroup_mutex for the latter that can cause
+        * issues considering the other two lock dependencies.
+        * By creating the debugfs directory here we avoid a dependency
+        * that may cause deadlock (even though file operations cannot
+        * occur until the filesystem is mounted, but I do not know how to
+        * tell lockdep that).
+        */
+       debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
+
        return 0;
 
 cleanup_mountpoint:
@@ -2601,3 +2861,11 @@ cleanup_root:
 
        return ret;
 }
+
+void __exit rdtgroup_exit(void)
+{
+       debugfs_remove_recursive(debugfs_resctrl);
+       unregister_filesystem(&rdt_fs_type);
+       sysfs_remove_mount_point(fs_kobj, "resctrl");
+       kernfs_destroy_root(rdt_root);
+}