Merge tag 'renesas-dt-fixes-for-v4.15' of https://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / kernel / pid.c
index 020dedbdf066bccbc370cba20be8dc7dbc914629..b13b624e2c4902c67d2ae789c7af8fb2d6269cd8 100644 (file)
 #include <linux/proc_ns.h>
 #include <linux/proc_fs.h>
 #include <linux/sched/task.h>
+#include <linux/idr.h>
 
-#define pid_hashfn(nr, ns)     \
-       hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift)
-static struct hlist_head *pid_hash;
-static unsigned int pidhash_shift = 4;
 struct pid init_struct_pid = INIT_STRUCT_PID;
 
 int pid_max = PID_MAX_DEFAULT;
@@ -53,15 +50,6 @@ int pid_max = PID_MAX_DEFAULT;
 int pid_max_min = RESERVED_PIDS + 1;
 int pid_max_max = PID_MAX_LIMIT;
 
-static inline int mk_pid(struct pid_namespace *pid_ns,
-               struct pidmap *map, int off)
-{
-       return (map - pid_ns->pidmap)*BITS_PER_PAGE + off;
-}
-
-#define find_next_offset(map, off)                                     \
-               find_next_zero_bit((map)->page, BITS_PER_PAGE, off)
-
 /*
  * PID-map pages start out as NULL, they get allocated upon
  * first use and are never deallocated. This way a low pid_max
@@ -70,11 +58,8 @@ static inline int mk_pid(struct pid_namespace *pid_ns,
  */
 struct pid_namespace init_pid_ns = {
        .kref = KREF_INIT(2),
-       .pidmap = {
-               [ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }
-       },
-       .last_pid = 0,
-       .nr_hashed = PIDNS_HASH_ADDING,
+       .idr = IDR_INIT,
+       .pid_allocated = PIDNS_ADDING,
        .level = 0,
        .child_reaper = &init_task,
        .user_ns = &init_user_ns,
@@ -101,138 +86,6 @@ EXPORT_SYMBOL_GPL(init_pid_ns);
 
 static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
 
-static void free_pidmap(struct upid *upid)
-{
-       int nr = upid->nr;
-       struct pidmap *map = upid->ns->pidmap + nr / BITS_PER_PAGE;
-       int offset = nr & BITS_PER_PAGE_MASK;
-
-       clear_bit(offset, map->page);
-       atomic_inc(&map->nr_free);
-}
-
-/*
- * If we started walking pids at 'base', is 'a' seen before 'b'?
- */
-static int pid_before(int base, int a, int b)
-{
-       /*
-        * This is the same as saying
-        *
-        * (a - base + MAXUINT) % MAXUINT < (b - base + MAXUINT) % MAXUINT
-        * and that mapping orders 'a' and 'b' with respect to 'base'.
-        */
-       return (unsigned)(a - base) < (unsigned)(b - base);
-}
-
-/*
- * We might be racing with someone else trying to set pid_ns->last_pid
- * at the pid allocation time (there's also a sysctl for this, but racing
- * with this one is OK, see comment in kernel/pid_namespace.c about it).
- * We want the winner to have the "later" value, because if the
- * "earlier" value prevails, then a pid may get reused immediately.
- *
- * Since pids rollover, it is not sufficient to just pick the bigger
- * value.  We have to consider where we started counting from.
- *
- * 'base' is the value of pid_ns->last_pid that we observed when
- * we started looking for a pid.
- *
- * 'pid' is the pid that we eventually found.
- */
-static void set_last_pid(struct pid_namespace *pid_ns, int base, int pid)
-{
-       int prev;
-       int last_write = base;
-       do {
-               prev = last_write;
-               last_write = cmpxchg(&pid_ns->last_pid, prev, pid);
-       } while ((prev != last_write) && (pid_before(base, last_write, pid)));
-}
-
-static int alloc_pidmap(struct pid_namespace *pid_ns)
-{
-       int i, offset, max_scan, pid, last = pid_ns->last_pid;
-       struct pidmap *map;
-
-       pid = last + 1;
-       if (pid >= pid_max)
-               pid = RESERVED_PIDS;
-       offset = pid & BITS_PER_PAGE_MASK;
-       map = &pid_ns->pidmap[pid/BITS_PER_PAGE];
-       /*
-        * If last_pid points into the middle of the map->page we
-        * want to scan this bitmap block twice, the second time
-        * we start with offset == 0 (or RESERVED_PIDS).
-        */
-       max_scan = DIV_ROUND_UP(pid_max, BITS_PER_PAGE) - !offset;
-       for (i = 0; i <= max_scan; ++i) {
-               if (unlikely(!map->page)) {
-                       void *page = kzalloc(PAGE_SIZE, GFP_KERNEL);
-                       /*
-                        * Free the page if someone raced with us
-                        * installing it:
-                        */
-                       spin_lock_irq(&pidmap_lock);
-                       if (!map->page) {
-                               map->page = page;
-                               page = NULL;
-                       }
-                       spin_unlock_irq(&pidmap_lock);
-                       kfree(page);
-                       if (unlikely(!map->page))
-                               return -ENOMEM;
-               }
-               if (likely(atomic_read(&map->nr_free))) {
-                       for ( ; ; ) {
-                               if (!test_and_set_bit(offset, map->page)) {
-                                       atomic_dec(&map->nr_free);
-                                       set_last_pid(pid_ns, last, pid);
-                                       return pid;
-                               }
-                               offset = find_next_offset(map, offset);
-                               if (offset >= BITS_PER_PAGE)
-                                       break;
-                               pid = mk_pid(pid_ns, map, offset);
-                               if (pid >= pid_max)
-                                       break;
-                       }
-               }
-               if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
-                       ++map;
-                       offset = 0;
-               } else {
-                       map = &pid_ns->pidmap[0];
-                       offset = RESERVED_PIDS;
-                       if (unlikely(last == offset))
-                               break;
-               }
-               pid = mk_pid(pid_ns, map, offset);
-       }
-       return -EAGAIN;
-}
-
-int next_pidmap(struct pid_namespace *pid_ns, unsigned int last)
-{
-       int offset;
-       struct pidmap *map, *end;
-
-       if (last >= PID_MAX_LIMIT)
-               return -1;
-
-       offset = (last + 1) & BITS_PER_PAGE_MASK;
-       map = &pid_ns->pidmap[(last + 1)/BITS_PER_PAGE];
-       end = &pid_ns->pidmap[PIDMAP_ENTRIES];
-       for (; map < end; map++, offset = 0) {
-               if (unlikely(!map->page))
-                       continue;
-               offset = find_next_bit((map)->page, BITS_PER_PAGE, offset);
-               if (offset < BITS_PER_PAGE)
-                       return mk_pid(pid_ns, map, offset);
-       }
-       return -1;
-}
-
 void put_pid(struct pid *pid)
 {
        struct pid_namespace *ns;
@@ -265,8 +118,7 @@ void free_pid(struct pid *pid)
        for (i = 0; i <= pid->level; i++) {
                struct upid *upid = pid->numbers + i;
                struct pid_namespace *ns = upid->ns;
-               hlist_del_rcu(&upid->pid_chain);
-               switch(--ns->nr_hashed) {
+               switch (--ns->pid_allocated) {
                case 2:
                case 1:
                        /* When all that is left in the pid namespace
@@ -275,21 +127,20 @@ void free_pid(struct pid *pid)
                         */
                        wake_up_process(ns->child_reaper);
                        break;
-               case PIDNS_HASH_ADDING:
+               case PIDNS_ADDING:
                        /* Handle a fork failure of the first process */
                        WARN_ON(ns->child_reaper);
-                       ns->nr_hashed = 0;
+                       ns->pid_allocated = 0;
                        /* fall through */
                case 0:
                        schedule_work(&ns->proc_work);
                        break;
                }
+
+               idr_remove(&ns->idr, upid->nr);
        }
        spin_unlock_irqrestore(&pidmap_lock, flags);
 
-       for (i = 0; i <= pid->level; i++)
-               free_pidmap(pid->numbers + i);
-
        call_rcu(&pid->rcu, delayed_put_pid);
 }
 
@@ -308,8 +159,29 @@ struct pid *alloc_pid(struct pid_namespace *ns)
 
        tmp = ns;
        pid->level = ns->level;
+
        for (i = ns->level; i >= 0; i--) {
-               nr = alloc_pidmap(tmp);
+               int pid_min = 1;
+
+               idr_preload(GFP_KERNEL);
+               spin_lock_irq(&pidmap_lock);
+
+               /*
+                * init really needs pid 1, but after reaching the maximum
+                * wrap back to RESERVED_PIDS
+                */
+               if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS)
+                       pid_min = RESERVED_PIDS;
+
+               /*
+                * Store a null pointer so find_pid_ns does not find
+                * a partially initialized PID (see below).
+                */
+               nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
+                                     pid_max, GFP_ATOMIC);
+               spin_unlock_irq(&pidmap_lock);
+               idr_preload_end();
+
                if (nr < 0) {
                        retval = nr;
                        goto out_free;
@@ -334,12 +206,12 @@ struct pid *alloc_pid(struct pid_namespace *ns)
 
        upid = pid->numbers + ns->level;
        spin_lock_irq(&pidmap_lock);
-       if (!(ns->nr_hashed & PIDNS_HASH_ADDING))
+       if (!(ns->pid_allocated & PIDNS_ADDING))
                goto out_unlock;
        for ( ; upid >= pid->numbers; --upid) {
-               hlist_add_head_rcu(&upid->pid_chain,
-                               &pid_hash[pid_hashfn(upid->nr, upid->ns)]);
-               upid->ns->nr_hashed++;
+               /* Make the PID visible to find_pid_ns. */
+               idr_replace(&upid->ns->idr, pid, upid->nr);
+               upid->ns->pid_allocated++;
        }
        spin_unlock_irq(&pidmap_lock);
 
@@ -350,8 +222,11 @@ out_unlock:
        put_pid_ns(ns);
 
 out_free:
+       spin_lock_irq(&pidmap_lock);
        while (++i <= ns->level)
-               free_pidmap(pid->numbers + i);
+               idr_remove(&ns->idr, (pid->numbers + i)->nr);
+
+       spin_unlock_irq(&pidmap_lock);
 
        kmem_cache_free(ns->pid_cachep, pid);
        return ERR_PTR(retval);
@@ -360,21 +235,13 @@ out_free:
 void disable_pid_allocation(struct pid_namespace *ns)
 {
        spin_lock_irq(&pidmap_lock);
-       ns->nr_hashed &= ~PIDNS_HASH_ADDING;
+       ns->pid_allocated &= ~PIDNS_ADDING;
        spin_unlock_irq(&pidmap_lock);
 }
 
 struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
 {
-       struct upid *pnr;
-
-       hlist_for_each_entry_rcu(pnr,
-                       &pid_hash[pid_hashfn(nr, ns)], pid_chain)
-               if (pnr->nr == nr && pnr->ns == ns)
-                       return container_of(pnr, struct pid,
-                                       numbers[ns->level]);
-
-       return NULL;
+       return idr_find(&ns->idr, nr);
 }
 EXPORT_SYMBOL_GPL(find_pid_ns);
 
@@ -530,6 +397,7 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
                if (type != PIDTYPE_PID) {
                        if (type == __PIDTYPE_TGID)
                                type = PIDTYPE_PID;
+
                        task = task->group_leader;
                }
                nr = pid_nr_ns(rcu_dereference(task->pids[type].pid), ns);
@@ -553,35 +421,13 @@ EXPORT_SYMBOL_GPL(task_active_pid_ns);
  */
 struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
 {
-       struct pid *pid;
-
-       do {
-               pid = find_pid_ns(nr, ns);
-               if (pid)
-                       break;
-               nr = next_pidmap(ns, nr);
-       } while (nr > 0);
-
-       return pid;
-}
-
-/*
- * The pid hash table is scaled according to the amount of memory in the
- * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
- * more.
- */
-void __init pidhash_init(void)
-{
-       pid_hash = alloc_large_system_hash("PID", sizeof(*pid_hash), 0, 18,
-                                          HASH_EARLY | HASH_SMALL | HASH_ZERO,
-                                          &pidhash_shift, NULL,
-                                          0, 4096);
+       return idr_get_next(&ns->idr, &nr);
 }
 
-void __init pidmap_init(void)
+void __init pid_idr_init(void)
 {
        /* Verify no one has done anything silly: */
-       BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_HASH_ADDING);
+       BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING);
 
        /* bump default and minimum pid_max based on number of cpus */
        pid_max = min(pid_max_max, max_t(int, pid_max,
@@ -590,10 +436,7 @@ void __init pidmap_init(void)
                                PIDS_PER_CPU_MIN * num_possible_cpus());
        pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
 
-       init_pid_ns.pidmap[0].page = kzalloc(PAGE_SIZE, GFP_KERNEL);
-       /* Reserve PID 0. We never call free_pidmap(0) */
-       set_bit(0, init_pid_ns.pidmap[0].page);
-       atomic_dec(&init_pid_ns.pidmap[0].nr_free);
+       idr_init(&init_pid_ns.idr);
 
        init_pid_ns.pid_cachep = KMEM_CACHE(pid,
                        SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);