IB: address translation to map IP toIB addresses (GIDs)
[sfrench/cifs-2.6.git] / drivers / infiniband / core / addr.c
1 /*
2  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
6  *
7  * This Software is licensed under one of the following licenses:
8  *
9  * 1) under the terms of the "Common Public License 1.0" a copy of which is
10  *    available from the Open Source Initiative, see
11  *    http://www.opensource.org/licenses/cpl.php.
12  *
13  * 2) under the terms of the "The BSD License" a copy of which is
14  *    available from the Open Source Initiative, see
15  *    http://www.opensource.org/licenses/bsd-license.php.
16  *
17  * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
18  *    copy of which is available from the Open Source Initiative, see
19  *    http://www.opensource.org/licenses/gpl-license.php.
20  *
21  * Licensee has the right to choose one of the above licenses.
22  *
23  * Redistributions of source code must retain the above copyright
24  * notice and one of the license notices.
25  *
26  * Redistributions in binary form must reproduce both the above copyright
27  * notice, one of the license notices in the documentation
28  * and/or other materials provided with the distribution.
29  */
30
31 #include <linux/mutex.h>
32 #include <linux/inetdevice.h>
33 #include <linux/workqueue.h>
34 #include <linux/if_arp.h>
35 #include <net/arp.h>
36 #include <net/neighbour.h>
37 #include <net/route.h>
38 #include <rdma/ib_addr.h>
39
40 MODULE_AUTHOR("Sean Hefty");
41 MODULE_DESCRIPTION("IB Address Translation");
42 MODULE_LICENSE("Dual BSD/GPL");
43
44 struct addr_req {
45         struct list_head list;
46         struct sockaddr src_addr;
47         struct sockaddr dst_addr;
48         struct rdma_dev_addr *addr;
49         void *context;
50         void (*callback)(int status, struct sockaddr *src_addr,
51                          struct rdma_dev_addr *addr, void *context);
52         unsigned long timeout;
53         int status;
54 };
55
56 static void process_req(void *data);
57
58 static DEFINE_MUTEX(lock);
59 static LIST_HEAD(req_list);
60 static DECLARE_WORK(work, process_req, NULL);
61 static struct workqueue_struct *addr_wq;
62
63 static int copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
64                      unsigned char *dst_dev_addr)
65 {
66         switch (dev->type) {
67         case ARPHRD_INFINIBAND:
68                 dev_addr->dev_type = IB_NODE_CA;
69                 break;
70         default:
71                 return -EADDRNOTAVAIL;
72         }
73
74         memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
75         memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
76         if (dst_dev_addr)
77                 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
78         return 0;
79 }
80
81 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
82 {
83         struct net_device *dev;
84         u32 ip = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
85         int ret;
86
87         dev = ip_dev_find(ip);
88         if (!dev)
89                 return -EADDRNOTAVAIL;
90
91         ret = copy_addr(dev_addr, dev, NULL);
92         dev_put(dev);
93         return ret;
94 }
95 EXPORT_SYMBOL(rdma_translate_ip);
96
97 static void set_timeout(unsigned long time)
98 {
99         unsigned long delay;
100
101         cancel_delayed_work(&work);
102
103         delay = time - jiffies;
104         if ((long)delay <= 0)
105                 delay = 1;
106
107         queue_delayed_work(addr_wq, &work, delay);
108 }
109
110 static void queue_req(struct addr_req *req)
111 {
112         struct addr_req *temp_req;
113
114         mutex_lock(&lock);
115         list_for_each_entry_reverse(temp_req, &req_list, list) {
116                 if (time_after(req->timeout, temp_req->timeout))
117                         break;
118         }
119
120         list_add(&req->list, &temp_req->list);
121
122         if (req_list.next == &req->list)
123                 set_timeout(req->timeout);
124         mutex_unlock(&lock);
125 }
126
127 static void addr_send_arp(struct sockaddr_in *dst_in)
128 {
129         struct rtable *rt;
130         struct flowi fl;
131         u32 dst_ip = dst_in->sin_addr.s_addr;
132
133         memset(&fl, 0, sizeof fl);
134         fl.nl_u.ip4_u.daddr = dst_ip;
135         if (ip_route_output_key(&rt, &fl))
136                 return;
137
138         arp_send(ARPOP_REQUEST, ETH_P_ARP, rt->rt_gateway, rt->idev->dev,
139                  rt->rt_src, NULL, rt->idev->dev->dev_addr, NULL);
140         ip_rt_put(rt);
141 }
142
143 static int addr_resolve_remote(struct sockaddr_in *src_in,
144                                struct sockaddr_in *dst_in,
145                                struct rdma_dev_addr *addr)
146 {
147         u32 src_ip = src_in->sin_addr.s_addr;
148         u32 dst_ip = dst_in->sin_addr.s_addr;
149         struct flowi fl;
150         struct rtable *rt;
151         struct neighbour *neigh;
152         int ret;
153
154         memset(&fl, 0, sizeof fl);
155         fl.nl_u.ip4_u.daddr = dst_ip;
156         fl.nl_u.ip4_u.saddr = src_ip;
157         ret = ip_route_output_key(&rt, &fl);
158         if (ret)
159                 goto out;
160
161         /* If the device does ARP internally, return 'done' */
162         if (rt->idev->dev->flags & IFF_NOARP) {
163                 copy_addr(addr, rt->idev->dev, NULL);
164                 goto put;
165         }
166
167         neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
168         if (!neigh) {
169                 ret = -ENODATA;
170                 goto put;
171         }
172
173         if (!(neigh->nud_state & NUD_VALID)) {
174                 ret = -ENODATA;
175                 goto release;
176         }
177
178         if (!src_ip) {
179                 src_in->sin_family = dst_in->sin_family;
180                 src_in->sin_addr.s_addr = rt->rt_src;
181         }
182
183         ret = copy_addr(addr, neigh->dev, neigh->ha);
184 release:
185         neigh_release(neigh);
186 put:
187         ip_rt_put(rt);
188 out:
189         return ret;
190 }
191
192 static void process_req(void *data)
193 {
194         struct addr_req *req, *temp_req;
195         struct sockaddr_in *src_in, *dst_in;
196         struct list_head done_list;
197
198         INIT_LIST_HEAD(&done_list);
199
200         mutex_lock(&lock);
201         list_for_each_entry_safe(req, temp_req, &req_list, list) {
202                 if (req->status) {
203                         src_in = (struct sockaddr_in *) &req->src_addr;
204                         dst_in = (struct sockaddr_in *) &req->dst_addr;
205                         req->status = addr_resolve_remote(src_in, dst_in,
206                                                           req->addr);
207                 }
208                 if (req->status && time_after(jiffies, req->timeout))
209                         req->status = -ETIMEDOUT;
210                 else if (req->status == -ENODATA)
211                         continue;
212
213                 list_del(&req->list);
214                 list_add_tail(&req->list, &done_list);
215         }
216
217         if (!list_empty(&req_list)) {
218                 req = list_entry(req_list.next, struct addr_req, list);
219                 set_timeout(req->timeout);
220         }
221         mutex_unlock(&lock);
222
223         list_for_each_entry_safe(req, temp_req, &done_list, list) {
224                 list_del(&req->list);
225                 req->callback(req->status, &req->src_addr, req->addr,
226                               req->context);
227                 kfree(req);
228         }
229 }
230
231 static int addr_resolve_local(struct sockaddr_in *src_in,
232                               struct sockaddr_in *dst_in,
233                               struct rdma_dev_addr *addr)
234 {
235         struct net_device *dev;
236         u32 src_ip = src_in->sin_addr.s_addr;
237         u32 dst_ip = dst_in->sin_addr.s_addr;
238         int ret;
239
240         dev = ip_dev_find(dst_ip);
241         if (!dev)
242                 return -EADDRNOTAVAIL;
243
244         if (ZERONET(src_ip)) {
245                 src_in->sin_family = dst_in->sin_family;
246                 src_in->sin_addr.s_addr = dst_ip;
247                 ret = copy_addr(addr, dev, dev->dev_addr);
248         } else if (LOOPBACK(src_ip)) {
249                 ret = rdma_translate_ip((struct sockaddr *)dst_in, addr);
250                 if (!ret)
251                         memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
252         } else {
253                 ret = rdma_translate_ip((struct sockaddr *)src_in, addr);
254                 if (!ret)
255                         memcpy(addr->dst_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
256         }
257
258         dev_put(dev);
259         return ret;
260 }
261
262 int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
263                     struct rdma_dev_addr *addr, int timeout_ms,
264                     void (*callback)(int status, struct sockaddr *src_addr,
265                                      struct rdma_dev_addr *addr, void *context),
266                     void *context)
267 {
268         struct sockaddr_in *src_in, *dst_in;
269         struct addr_req *req;
270         int ret = 0;
271
272         req = kmalloc(sizeof *req, GFP_KERNEL);
273         if (!req)
274                 return -ENOMEM;
275         memset(req, 0, sizeof *req);
276
277         if (src_addr)
278                 memcpy(&req->src_addr, src_addr, ip_addr_size(src_addr));
279         memcpy(&req->dst_addr, dst_addr, ip_addr_size(dst_addr));
280         req->addr = addr;
281         req->callback = callback;
282         req->context = context;
283
284         src_in = (struct sockaddr_in *) &req->src_addr;
285         dst_in = (struct sockaddr_in *) &req->dst_addr;
286
287         req->status = addr_resolve_local(src_in, dst_in, addr);
288         if (req->status == -EADDRNOTAVAIL)
289                 req->status = addr_resolve_remote(src_in, dst_in, addr);
290
291         switch (req->status) {
292         case 0:
293                 req->timeout = jiffies;
294                 queue_req(req);
295                 break;
296         case -ENODATA:
297                 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
298                 queue_req(req);
299                 addr_send_arp(dst_in);
300                 break;
301         default:
302                 ret = req->status;
303                 kfree(req);
304                 break;
305         }
306         return ret;
307 }
308 EXPORT_SYMBOL(rdma_resolve_ip);
309
310 void rdma_addr_cancel(struct rdma_dev_addr *addr)
311 {
312         struct addr_req *req, *temp_req;
313
314         mutex_lock(&lock);
315         list_for_each_entry_safe(req, temp_req, &req_list, list) {
316                 if (req->addr == addr) {
317                         req->status = -ECANCELED;
318                         req->timeout = jiffies;
319                         list_del(&req->list);
320                         list_add(&req->list, &req_list);
321                         set_timeout(req->timeout);
322                         break;
323                 }
324         }
325         mutex_unlock(&lock);
326 }
327 EXPORT_SYMBOL(rdma_addr_cancel);
328
329 static int addr_arp_recv(struct sk_buff *skb, struct net_device *dev,
330                          struct packet_type *pkt, struct net_device *orig_dev)
331 {
332         struct arphdr *arp_hdr;
333
334         arp_hdr = (struct arphdr *) skb->nh.raw;
335
336         if (arp_hdr->ar_op == htons(ARPOP_REQUEST) ||
337             arp_hdr->ar_op == htons(ARPOP_REPLY))
338                 set_timeout(jiffies);
339
340         kfree_skb(skb);
341         return 0;
342 }
343
344 static struct packet_type addr_arp = {
345         .type           = __constant_htons(ETH_P_ARP),
346         .func           = addr_arp_recv,
347         .af_packet_priv = (void*) 1,
348 };
349
350 static int addr_init(void)
351 {
352         addr_wq = create_singlethread_workqueue("ib_addr_wq");
353         if (!addr_wq)
354                 return -ENOMEM;
355
356         dev_add_pack(&addr_arp);
357         return 0;
358 }
359
360 static void addr_cleanup(void)
361 {
362         dev_remove_pack(&addr_arp);
363         destroy_workqueue(addr_wq);
364 }
365
366 module_init(addr_init);
367 module_exit(addr_cleanup);