Merge branches 'work.misc' and 'work.dcache' of git://git.kernel.org/pub/scm/linux...
[sfrench/cifs-2.6.git] / net / ipv6 / ip6_fib.c
index 1fb2f3118d60c73433e09f3a71abdbf9a8d92227..d212738e9d100d4e3270f9188466da6b8a3d186c 100644 (file)
@@ -935,20 +935,19 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 {
        struct fib6_info *leaf = rcu_dereference_protected(fn->leaf,
                                    lockdep_is_held(&rt->fib6_table->tb6_lock));
-       enum fib_event_type event = FIB_EVENT_ENTRY_ADD;
-       struct fib6_info *iter = NULL, *match = NULL;
+       struct fib6_info *iter = NULL;
        struct fib6_info __rcu **ins;
+       struct fib6_info __rcu **fallback_ins = NULL;
        int replace = (info->nlh &&
                       (info->nlh->nlmsg_flags & NLM_F_REPLACE));
-       int append = (info->nlh &&
-                      (info->nlh->nlmsg_flags & NLM_F_APPEND));
        int add = (!info->nlh ||
                   (info->nlh->nlmsg_flags & NLM_F_CREATE));
        int found = 0;
+       bool rt_can_ecmp = rt6_qualify_for_ecmp(rt);
        u16 nlflags = NLM_F_EXCL;
        int err;
 
-       if (append)
+       if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_APPEND))
                nlflags |= NLM_F_APPEND;
 
        ins = &fn->leaf;
@@ -970,8 +969,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 
                        nlflags &= ~NLM_F_EXCL;
                        if (replace) {
-                               found++;
-                               break;
+                               if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
+                                       found++;
+                                       break;
+                               }
+                               if (rt_can_ecmp)
+                                       fallback_ins = fallback_ins ?: ins;
+                               goto next_iter;
                        }
 
                        if (rt6_duplicate_nexthop(iter, rt)) {
@@ -986,51 +990,71 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
                                fib6_metric_set(iter, RTAX_MTU, rt->fib6_pmtu);
                                return -EEXIST;
                        }
-
-                       /* first route that matches */
-                       if (!match)
-                               match = iter;
+                       /* If we have the same destination and the same metric,
+                        * but not the same gateway, then the route we try to
+                        * add is sibling to this route, increment our counter
+                        * of siblings, and later we will add our route to the
+                        * list.
+                        * Only static routes (which don't have flag
+                        * RTF_EXPIRES) are used for ECMPv6.
+                        *
+                        * To avoid long list, we only had siblings if the
+                        * route have a gateway.
+                        */
+                       if (rt_can_ecmp &&
+                           rt6_qualify_for_ecmp(iter))
+                               rt->fib6_nsiblings++;
                }
 
                if (iter->fib6_metric > rt->fib6_metric)
                        break;
 
+next_iter:
                ins = &iter->fib6_next;
        }
 
+       if (fallback_ins && !found) {
+               /* No ECMP-able route found, replace first non-ECMP one */
+               ins = fallback_ins;
+               iter = rcu_dereference_protected(*ins,
+                                   lockdep_is_held(&rt->fib6_table->tb6_lock));
+               found++;
+       }
+
        /* Reset round-robin state, if necessary */
        if (ins == &fn->leaf)
                fn->rr_ptr = NULL;
 
        /* Link this route to others same route. */
-       if (append && match) {
+       if (rt->fib6_nsiblings) {
+               unsigned int fib6_nsiblings;
                struct fib6_info *sibling, *temp_sibling;
 
-               if (rt->fib6_flags & RTF_REJECT) {
-                       NL_SET_ERR_MSG(extack,
-                                      "Can not append a REJECT route");
-                       return -EINVAL;
-               } else if (match->fib6_flags & RTF_REJECT) {
-                       NL_SET_ERR_MSG(extack,
-                                      "Can not append to a REJECT route");
-                       return -EINVAL;
+               /* Find the first route that have the same metric */
+               sibling = leaf;
+               while (sibling) {
+                       if (sibling->fib6_metric == rt->fib6_metric &&
+                           rt6_qualify_for_ecmp(sibling)) {
+                               list_add_tail(&rt->fib6_siblings,
+                                             &sibling->fib6_siblings);
+                               break;
+                       }
+                       sibling = rcu_dereference_protected(sibling->fib6_next,
+                                   lockdep_is_held(&rt->fib6_table->tb6_lock));
                }
-               event = FIB_EVENT_ENTRY_APPEND;
-               rt->fib6_nsiblings = match->fib6_nsiblings;
-               list_add_tail(&rt->fib6_siblings, &match->fib6_siblings);
-               match->fib6_nsiblings++;
-
                /* For each sibling in the list, increment the counter of
                 * siblings. BUG() if counters does not match, list of siblings
                 * is broken!
                 */
+               fib6_nsiblings = 0;
                list_for_each_entry_safe(sibling, temp_sibling,
-                                        &match->fib6_siblings, fib6_siblings) {
+                                        &rt->fib6_siblings, fib6_siblings) {
                        sibling->fib6_nsiblings++;
-                       BUG_ON(sibling->fib6_nsiblings != match->fib6_nsiblings);
+                       BUG_ON(sibling->fib6_nsiblings != rt->fib6_nsiblings);
+                       fib6_nsiblings++;
                }
-
-               rt6_multipath_rebalance(match);
+               BUG_ON(fib6_nsiblings != rt->fib6_nsiblings);
+               rt6_multipath_rebalance(temp_sibling);
        }
 
        /*
@@ -1043,8 +1067,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct fib6_info *rt,
 add:
                nlflags |= NLM_F_CREATE;
 
-               err = call_fib6_entry_notifiers(info->nl_net, event, rt,
-                                               extack);
+               err = call_fib6_entry_notifiers(info->nl_net,
+                                               FIB_EVENT_ENTRY_ADD,
+                                               rt, extack);
                if (err)
                        return err;
 
@@ -1062,7 +1087,7 @@ add:
                }
 
        } else {
-               struct fib6_info *tmp;
+               int nsiblings;
 
                if (!found) {
                        if (add)
@@ -1077,57 +1102,48 @@ add:
                if (err)
                        return err;
 
-               /* if route being replaced has siblings, set tmp to
-                * last one, otherwise tmp is current route. this is
-                * used to set fib6_next for new route
-                */
-               if (iter->fib6_nsiblings)
-                       tmp = list_last_entry(&iter->fib6_siblings,
-                                             struct fib6_info,
-                                             fib6_siblings);
-               else
-                       tmp = iter;
-
-               /* insert new route */
                atomic_inc(&rt->fib6_ref);
                rcu_assign_pointer(rt->fib6_node, fn);
-               rt->fib6_next = tmp->fib6_next;
+               rt->fib6_next = iter->fib6_next;
                rcu_assign_pointer(*ins, rt);
-
                if (!info->skip_notify)
                        inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE);
                if (!(fn->fn_flags & RTN_RTINFO)) {
                        info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
                        fn->fn_flags |= RTN_RTINFO;
                }
+               nsiblings = iter->fib6_nsiblings;
+               iter->fib6_node = NULL;
+               fib6_purge_rt(iter, fn, info->nl_net);
+               if (rcu_access_pointer(fn->rr_ptr) == iter)
+                       fn->rr_ptr = NULL;
+               fib6_info_release(iter);
 
-               /* delete old route */
-               rt = iter;
-
-               if (rt->fib6_nsiblings) {
-                       struct fib6_info *tmp;
-
+               if (nsiblings) {
                        /* Replacing an ECMP route, remove all siblings */
-                       list_for_each_entry_safe(iter, tmp, &rt->fib6_siblings,
-                                                fib6_siblings) {
-                               iter->fib6_node = NULL;
-                               fib6_purge_rt(iter, fn, info->nl_net);
-                               if (rcu_access_pointer(fn->rr_ptr) == iter)
-                                       fn->rr_ptr = NULL;
-                               fib6_info_release(iter);
-
-                               rt->fib6_nsiblings--;
-                               info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
+                       ins = &rt->fib6_next;
+                       iter = rcu_dereference_protected(*ins,
+                                   lockdep_is_held(&rt->fib6_table->tb6_lock));
+                       while (iter) {
+                               if (iter->fib6_metric > rt->fib6_metric)
+                                       break;
+                               if (rt6_qualify_for_ecmp(iter)) {
+                                       *ins = iter->fib6_next;
+                                       iter->fib6_node = NULL;
+                                       fib6_purge_rt(iter, fn, info->nl_net);
+                                       if (rcu_access_pointer(fn->rr_ptr) == iter)
+                                               fn->rr_ptr = NULL;
+                                       fib6_info_release(iter);
+                                       nsiblings--;
+                                       info->nl_net->ipv6.rt6_stats->fib_rt_entries--;
+                               } else {
+                                       ins = &iter->fib6_next;
+                               }
+                               iter = rcu_dereference_protected(*ins,
+                                       lockdep_is_held(&rt->fib6_table->tb6_lock));
                        }
+                       WARN_ON(nsiblings != 0);
                }
-
-               WARN_ON(rt->fib6_nsiblings != 0);
-
-               rt->fib6_node = NULL;
-               fib6_purge_rt(rt, fn, info->nl_net);
-               if (rcu_access_pointer(fn->rr_ptr) == rt)
-                       fn->rr_ptr = NULL;
-               fib6_info_release(rt);
        }
 
        return 0;