Merge tag 'pci-v4.9-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/helgaas/pci
[sfrench/cifs-2.6.git] / drivers / pci / host / vmd.c
1 /*
2  * Volume Management Device driver
3  * Copyright (c) 2015, Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  */
14
15 #include <linux/device.h>
16 #include <linux/interrupt.h>
17 #include <linux/irq.h>
18 #include <linux/kernel.h>
19 #include <linux/module.h>
20 #include <linux/msi.h>
21 #include <linux/pci.h>
22 #include <linux/rculist.h>
23 #include <linux/rcupdate.h>
24
25 #include <asm/irqdomain.h>
26 #include <asm/device.h>
27 #include <asm/msi.h>
28 #include <asm/msidef.h>
29
30 #define VMD_CFGBAR      0
31 #define VMD_MEMBAR1     2
32 #define VMD_MEMBAR2     4
33
34 /*
35  * Lock for manipulating VMD IRQ lists.
36  */
37 static DEFINE_RAW_SPINLOCK(list_lock);
38
39 /**
40  * struct vmd_irq - private data to map driver IRQ to the VMD shared vector
41  * @node:       list item for parent traversal.
42  * @rcu:        RCU callback item for freeing.
43  * @irq:        back pointer to parent.
44  * @enabled:    true if driver enabled IRQ
45  * @virq:       the virtual IRQ value provided to the requesting driver.
46  *
47  * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to
48  * a VMD IRQ using this structure.
49  */
50 struct vmd_irq {
51         struct list_head        node;
52         struct rcu_head         rcu;
53         struct vmd_irq_list     *irq;
54         bool                    enabled;
55         unsigned int            virq;
56 };
57
58 /**
59  * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector
60  * @irq_list:   the list of irq's the VMD one demuxes to.
61  * @count:      number of child IRQs assigned to this vector; used to track
62  *              sharing.
63  */
64 struct vmd_irq_list {
65         struct list_head        irq_list;
66         unsigned int            count;
67 };
68
69 struct vmd_dev {
70         struct pci_dev          *dev;
71
72         spinlock_t              cfg_lock;
73         char __iomem            *cfgbar;
74
75         int msix_count;
76         struct vmd_irq_list     *irqs;
77
78         struct pci_sysdata      sysdata;
79         struct resource         resources[3];
80         struct irq_domain       *irq_domain;
81         struct pci_bus          *bus;
82
83 #ifdef CONFIG_X86_DEV_DMA_OPS
84         struct dma_map_ops      dma_ops;
85         struct dma_domain       dma_domain;
86 #endif
87 };
88
89 static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus)
90 {
91         return container_of(bus->sysdata, struct vmd_dev, sysdata);
92 }
93
94 static inline unsigned int index_from_irqs(struct vmd_dev *vmd,
95                                            struct vmd_irq_list *irqs)
96 {
97         return irqs - vmd->irqs;
98 }
99
100 /*
101  * Drivers managing a device in a VMD domain allocate their own IRQs as before,
102  * but the MSI entry for the hardware it's driving will be programmed with a
103  * destination ID for the VMD MSI-X table.  The VMD muxes interrupts in its
104  * domain into one of its own, and the VMD driver de-muxes these for the
105  * handlers sharing that VMD IRQ.  The vmd irq_domain provides the operations
106  * and irq_chip to set this up.
107  */
108 static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
109 {
110         struct vmd_irq *vmdirq = data->chip_data;
111         struct vmd_irq_list *irq = vmdirq->irq;
112         struct vmd_dev *vmd = irq_data_get_irq_handler_data(data);
113
114         msg->address_hi = MSI_ADDR_BASE_HI;
115         msg->address_lo = MSI_ADDR_BASE_LO |
116                           MSI_ADDR_DEST_ID(index_from_irqs(vmd, irq));
117         msg->data = 0;
118 }
119
120 /*
121  * We rely on MSI_FLAG_USE_DEF_CHIP_OPS to set the IRQ mask/unmask ops.
122  */
123 static void vmd_irq_enable(struct irq_data *data)
124 {
125         struct vmd_irq *vmdirq = data->chip_data;
126         unsigned long flags;
127
128         raw_spin_lock_irqsave(&list_lock, flags);
129         WARN_ON(vmdirq->enabled);
130         list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list);
131         vmdirq->enabled = true;
132         raw_spin_unlock_irqrestore(&list_lock, flags);
133
134         data->chip->irq_unmask(data);
135 }
136
137 static void vmd_irq_disable(struct irq_data *data)
138 {
139         struct vmd_irq *vmdirq = data->chip_data;
140         unsigned long flags;
141
142         data->chip->irq_mask(data);
143
144         raw_spin_lock_irqsave(&list_lock, flags);
145         if (vmdirq->enabled) {
146                 list_del_rcu(&vmdirq->node);
147                 vmdirq->enabled = false;
148         }
149         raw_spin_unlock_irqrestore(&list_lock, flags);
150 }
151
152 /*
153  * XXX: Stubbed until we develop acceptable way to not create conflicts with
154  * other devices sharing the same vector.
155  */
156 static int vmd_irq_set_affinity(struct irq_data *data,
157                                 const struct cpumask *dest, bool force)
158 {
159         return -EINVAL;
160 }
161
162 static struct irq_chip vmd_msi_controller = {
163         .name                   = "VMD-MSI",
164         .irq_enable             = vmd_irq_enable,
165         .irq_disable            = vmd_irq_disable,
166         .irq_compose_msi_msg    = vmd_compose_msi_msg,
167         .irq_set_affinity       = vmd_irq_set_affinity,
168 };
169
170 static irq_hw_number_t vmd_get_hwirq(struct msi_domain_info *info,
171                                      msi_alloc_info_t *arg)
172 {
173         return 0;
174 }
175
176 /*
177  * XXX: We can be even smarter selecting the best IRQ once we solve the
178  * affinity problem.
179  */
180 static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc)
181 {
182         int i, best = 1;
183         unsigned long flags;
184
185         if (!desc->msi_attrib.is_msix || vmd->msix_count == 1)
186                 return &vmd->irqs[0];
187
188         raw_spin_lock_irqsave(&list_lock, flags);
189         for (i = 1; i < vmd->msix_count; i++)
190                 if (vmd->irqs[i].count < vmd->irqs[best].count)
191                         best = i;
192         vmd->irqs[best].count++;
193         raw_spin_unlock_irqrestore(&list_lock, flags);
194
195         return &vmd->irqs[best];
196 }
197
198 static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info,
199                         unsigned int virq, irq_hw_number_t hwirq,
200                         msi_alloc_info_t *arg)
201 {
202         struct msi_desc *desc = arg->desc;
203         struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus);
204         struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL);
205         unsigned int index, vector;
206
207         if (!vmdirq)
208                 return -ENOMEM;
209
210         INIT_LIST_HEAD(&vmdirq->node);
211         vmdirq->irq = vmd_next_irq(vmd, desc);
212         vmdirq->virq = virq;
213         index = index_from_irqs(vmd, vmdirq->irq);
214         vector = pci_irq_vector(vmd->dev, index);
215
216         irq_domain_set_info(domain, virq, vector, info->chip, vmdirq,
217                             handle_untracked_irq, vmd, NULL);
218         return 0;
219 }
220
221 static void vmd_msi_free(struct irq_domain *domain,
222                         struct msi_domain_info *info, unsigned int virq)
223 {
224         struct vmd_irq *vmdirq = irq_get_chip_data(virq);
225         unsigned long flags;
226
227         synchronize_rcu();
228
229         /* XXX: Potential optimization to rebalance */
230         raw_spin_lock_irqsave(&list_lock, flags);
231         vmdirq->irq->count--;
232         raw_spin_unlock_irqrestore(&list_lock, flags);
233
234         kfree_rcu(vmdirq, rcu);
235 }
236
237 static int vmd_msi_prepare(struct irq_domain *domain, struct device *dev,
238                            int nvec, msi_alloc_info_t *arg)
239 {
240         struct pci_dev *pdev = to_pci_dev(dev);
241         struct vmd_dev *vmd = vmd_from_bus(pdev->bus);
242
243         if (nvec > vmd->msix_count)
244                 return vmd->msix_count;
245
246         memset(arg, 0, sizeof(*arg));
247         return 0;
248 }
249
250 static void vmd_set_desc(msi_alloc_info_t *arg, struct msi_desc *desc)
251 {
252         arg->desc = desc;
253 }
254
255 static struct msi_domain_ops vmd_msi_domain_ops = {
256         .get_hwirq      = vmd_get_hwirq,
257         .msi_init       = vmd_msi_init,
258         .msi_free       = vmd_msi_free,
259         .msi_prepare    = vmd_msi_prepare,
260         .set_desc       = vmd_set_desc,
261 };
262
263 static struct msi_domain_info vmd_msi_domain_info = {
264         .flags          = MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
265                           MSI_FLAG_PCI_MSIX,
266         .ops            = &vmd_msi_domain_ops,
267         .chip           = &vmd_msi_controller,
268 };
269
270 #ifdef CONFIG_X86_DEV_DMA_OPS
271 /*
272  * VMD replaces the requester ID with its own.  DMA mappings for devices in a
273  * VMD domain need to be mapped for the VMD, not the device requiring
274  * the mapping.
275  */
276 static struct device *to_vmd_dev(struct device *dev)
277 {
278         struct pci_dev *pdev = to_pci_dev(dev);
279         struct vmd_dev *vmd = vmd_from_bus(pdev->bus);
280
281         return &vmd->dev->dev;
282 }
283
284 static struct dma_map_ops *vmd_dma_ops(struct device *dev)
285 {
286         return get_dma_ops(to_vmd_dev(dev));
287 }
288
289 static void *vmd_alloc(struct device *dev, size_t size, dma_addr_t *addr,
290                        gfp_t flag, unsigned long attrs)
291 {
292         return vmd_dma_ops(dev)->alloc(to_vmd_dev(dev), size, addr, flag,
293                                        attrs);
294 }
295
296 static void vmd_free(struct device *dev, size_t size, void *vaddr,
297                      dma_addr_t addr, unsigned long attrs)
298 {
299         return vmd_dma_ops(dev)->free(to_vmd_dev(dev), size, vaddr, addr,
300                                       attrs);
301 }
302
303 static int vmd_mmap(struct device *dev, struct vm_area_struct *vma,
304                     void *cpu_addr, dma_addr_t addr, size_t size,
305                     unsigned long attrs)
306 {
307         return vmd_dma_ops(dev)->mmap(to_vmd_dev(dev), vma, cpu_addr, addr,
308                                       size, attrs);
309 }
310
311 static int vmd_get_sgtable(struct device *dev, struct sg_table *sgt,
312                            void *cpu_addr, dma_addr_t addr, size_t size,
313                            unsigned long attrs)
314 {
315         return vmd_dma_ops(dev)->get_sgtable(to_vmd_dev(dev), sgt, cpu_addr,
316                                              addr, size, attrs);
317 }
318
319 static dma_addr_t vmd_map_page(struct device *dev, struct page *page,
320                                unsigned long offset, size_t size,
321                                enum dma_data_direction dir,
322                                unsigned long attrs)
323 {
324         return vmd_dma_ops(dev)->map_page(to_vmd_dev(dev), page, offset, size,
325                                           dir, attrs);
326 }
327
328 static void vmd_unmap_page(struct device *dev, dma_addr_t addr, size_t size,
329                            enum dma_data_direction dir, unsigned long attrs)
330 {
331         vmd_dma_ops(dev)->unmap_page(to_vmd_dev(dev), addr, size, dir, attrs);
332 }
333
334 static int vmd_map_sg(struct device *dev, struct scatterlist *sg, int nents,
335                       enum dma_data_direction dir, unsigned long attrs)
336 {
337         return vmd_dma_ops(dev)->map_sg(to_vmd_dev(dev), sg, nents, dir, attrs);
338 }
339
340 static void vmd_unmap_sg(struct device *dev, struct scatterlist *sg, int nents,
341                          enum dma_data_direction dir, unsigned long attrs)
342 {
343         vmd_dma_ops(dev)->unmap_sg(to_vmd_dev(dev), sg, nents, dir, attrs);
344 }
345
346 static void vmd_sync_single_for_cpu(struct device *dev, dma_addr_t addr,
347                                     size_t size, enum dma_data_direction dir)
348 {
349         vmd_dma_ops(dev)->sync_single_for_cpu(to_vmd_dev(dev), addr, size, dir);
350 }
351
352 static void vmd_sync_single_for_device(struct device *dev, dma_addr_t addr,
353                                        size_t size, enum dma_data_direction dir)
354 {
355         vmd_dma_ops(dev)->sync_single_for_device(to_vmd_dev(dev), addr, size,
356                                                  dir);
357 }
358
359 static void vmd_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
360                                 int nents, enum dma_data_direction dir)
361 {
362         vmd_dma_ops(dev)->sync_sg_for_cpu(to_vmd_dev(dev), sg, nents, dir);
363 }
364
365 static void vmd_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
366                                    int nents, enum dma_data_direction dir)
367 {
368         vmd_dma_ops(dev)->sync_sg_for_device(to_vmd_dev(dev), sg, nents, dir);
369 }
370
371 static int vmd_mapping_error(struct device *dev, dma_addr_t addr)
372 {
373         return vmd_dma_ops(dev)->mapping_error(to_vmd_dev(dev), addr);
374 }
375
376 static int vmd_dma_supported(struct device *dev, u64 mask)
377 {
378         return vmd_dma_ops(dev)->dma_supported(to_vmd_dev(dev), mask);
379 }
380
381 #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
382 static u64 vmd_get_required_mask(struct device *dev)
383 {
384         return vmd_dma_ops(dev)->get_required_mask(to_vmd_dev(dev));
385 }
386 #endif
387
388 static void vmd_teardown_dma_ops(struct vmd_dev *vmd)
389 {
390         struct dma_domain *domain = &vmd->dma_domain;
391
392         if (get_dma_ops(&vmd->dev->dev))
393                 del_dma_domain(domain);
394 }
395
396 #define ASSIGN_VMD_DMA_OPS(source, dest, fn)    \
397         do {                                    \
398                 if (source->fn)                 \
399                         dest->fn = vmd_##fn;    \
400         } while (0)
401
402 static void vmd_setup_dma_ops(struct vmd_dev *vmd)
403 {
404         const struct dma_map_ops *source = get_dma_ops(&vmd->dev->dev);
405         struct dma_map_ops *dest = &vmd->dma_ops;
406         struct dma_domain *domain = &vmd->dma_domain;
407
408         domain->domain_nr = vmd->sysdata.domain;
409         domain->dma_ops = dest;
410
411         if (!source)
412                 return;
413         ASSIGN_VMD_DMA_OPS(source, dest, alloc);
414         ASSIGN_VMD_DMA_OPS(source, dest, free);
415         ASSIGN_VMD_DMA_OPS(source, dest, mmap);
416         ASSIGN_VMD_DMA_OPS(source, dest, get_sgtable);
417         ASSIGN_VMD_DMA_OPS(source, dest, map_page);
418         ASSIGN_VMD_DMA_OPS(source, dest, unmap_page);
419         ASSIGN_VMD_DMA_OPS(source, dest, map_sg);
420         ASSIGN_VMD_DMA_OPS(source, dest, unmap_sg);
421         ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_cpu);
422         ASSIGN_VMD_DMA_OPS(source, dest, sync_single_for_device);
423         ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_cpu);
424         ASSIGN_VMD_DMA_OPS(source, dest, sync_sg_for_device);
425         ASSIGN_VMD_DMA_OPS(source, dest, mapping_error);
426         ASSIGN_VMD_DMA_OPS(source, dest, dma_supported);
427 #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK
428         ASSIGN_VMD_DMA_OPS(source, dest, get_required_mask);
429 #endif
430         add_dma_domain(domain);
431 }
432 #undef ASSIGN_VMD_DMA_OPS
433 #else
434 static void vmd_teardown_dma_ops(struct vmd_dev *vmd) {}
435 static void vmd_setup_dma_ops(struct vmd_dev *vmd) {}
436 #endif
437
438 static char __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus,
439                                   unsigned int devfn, int reg, int len)
440 {
441         char __iomem *addr = vmd->cfgbar +
442                              (bus->number << 20) + (devfn << 12) + reg;
443
444         if ((addr - vmd->cfgbar) + len >=
445             resource_size(&vmd->dev->resource[VMD_CFGBAR]))
446                 return NULL;
447
448         return addr;
449 }
450
451 /*
452  * CPU may deadlock if config space is not serialized on some versions of this
453  * hardware, so all config space access is done under a spinlock.
454  */
455 static int vmd_pci_read(struct pci_bus *bus, unsigned int devfn, int reg,
456                         int len, u32 *value)
457 {
458         struct vmd_dev *vmd = vmd_from_bus(bus);
459         char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len);
460         unsigned long flags;
461         int ret = 0;
462
463         if (!addr)
464                 return -EFAULT;
465
466         spin_lock_irqsave(&vmd->cfg_lock, flags);
467         switch (len) {
468         case 1:
469                 *value = readb(addr);
470                 break;
471         case 2:
472                 *value = readw(addr);
473                 break;
474         case 4:
475                 *value = readl(addr);
476                 break;
477         default:
478                 ret = -EINVAL;
479                 break;
480         }
481         spin_unlock_irqrestore(&vmd->cfg_lock, flags);
482         return ret;
483 }
484
485 /*
486  * VMD h/w converts non-posted config writes to posted memory writes. The
487  * read-back in this function forces the completion so it returns only after
488  * the config space was written, as expected.
489  */
490 static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg,
491                          int len, u32 value)
492 {
493         struct vmd_dev *vmd = vmd_from_bus(bus);
494         char __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len);
495         unsigned long flags;
496         int ret = 0;
497
498         if (!addr)
499                 return -EFAULT;
500
501         spin_lock_irqsave(&vmd->cfg_lock, flags);
502         switch (len) {
503         case 1:
504                 writeb(value, addr);
505                 readb(addr);
506                 break;
507         case 2:
508                 writew(value, addr);
509                 readw(addr);
510                 break;
511         case 4:
512                 writel(value, addr);
513                 readl(addr);
514                 break;
515         default:
516                 ret = -EINVAL;
517                 break;
518         }
519         spin_unlock_irqrestore(&vmd->cfg_lock, flags);
520         return ret;
521 }
522
523 static struct pci_ops vmd_ops = {
524         .read           = vmd_pci_read,
525         .write          = vmd_pci_write,
526 };
527
528 static void vmd_attach_resources(struct vmd_dev *vmd)
529 {
530         vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1];
531         vmd->dev->resource[VMD_MEMBAR2].child = &vmd->resources[2];
532 }
533
534 static void vmd_detach_resources(struct vmd_dev *vmd)
535 {
536         vmd->dev->resource[VMD_MEMBAR1].child = NULL;
537         vmd->dev->resource[VMD_MEMBAR2].child = NULL;
538 }
539
540 /*
541  * VMD domains start at 0x1000 to not clash with ACPI _SEG domains.
542  */
543 static int vmd_find_free_domain(void)
544 {
545         int domain = 0xffff;
546         struct pci_bus *bus = NULL;
547
548         while ((bus = pci_find_next_bus(bus)) != NULL)
549                 domain = max_t(int, domain, pci_domain_nr(bus));
550         return domain + 1;
551 }
552
553 static int vmd_enable_domain(struct vmd_dev *vmd)
554 {
555         struct pci_sysdata *sd = &vmd->sysdata;
556         struct resource *res;
557         u32 upper_bits;
558         unsigned long flags;
559         LIST_HEAD(resources);
560
561         res = &vmd->dev->resource[VMD_CFGBAR];
562         vmd->resources[0] = (struct resource) {
563                 .name  = "VMD CFGBAR",
564                 .start = 0,
565                 .end   = (resource_size(res) >> 20) - 1,
566                 .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED,
567         };
568
569         /*
570          * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can
571          * put 32-bit resources in the window.
572          *
573          * There's no hardware reason why a 64-bit window *couldn't*
574          * contain a 32-bit resource, but pbus_size_mem() computes the
575          * bridge window size assuming a 64-bit window will contain no
576          * 32-bit resources.  __pci_assign_resource() enforces that
577          * artificial restriction to make sure everything will fit.
578          *
579          * The only way we could use a 64-bit non-prefechable MEMBAR is
580          * if its address is <4GB so that we can convert it to a 32-bit
581          * resource.  To be visible to the host OS, all VMD endpoints must
582          * be initially configured by platform BIOS, which includes setting
583          * up these resources.  We can assume the device is configured
584          * according to the platform needs.
585          */
586         res = &vmd->dev->resource[VMD_MEMBAR1];
587         upper_bits = upper_32_bits(res->end);
588         flags = res->flags & ~IORESOURCE_SIZEALIGN;
589         if (!upper_bits)
590                 flags &= ~IORESOURCE_MEM_64;
591         vmd->resources[1] = (struct resource) {
592                 .name  = "VMD MEMBAR1",
593                 .start = res->start,
594                 .end   = res->end,
595                 .flags = flags,
596                 .parent = res,
597         };
598
599         res = &vmd->dev->resource[VMD_MEMBAR2];
600         upper_bits = upper_32_bits(res->end);
601         flags = res->flags & ~IORESOURCE_SIZEALIGN;
602         if (!upper_bits)
603                 flags &= ~IORESOURCE_MEM_64;
604         vmd->resources[2] = (struct resource) {
605                 .name  = "VMD MEMBAR2",
606                 .start = res->start + 0x2000,
607                 .end   = res->end,
608                 .flags = flags,
609                 .parent = res,
610         };
611
612         sd->vmd_domain = true;
613         sd->domain = vmd_find_free_domain();
614         if (sd->domain < 0)
615                 return sd->domain;
616
617         sd->node = pcibus_to_node(vmd->dev->bus);
618
619         vmd->irq_domain = pci_msi_create_irq_domain(NULL, &vmd_msi_domain_info,
620                                                     x86_vector_domain);
621         if (!vmd->irq_domain)
622                 return -ENODEV;
623
624         pci_add_resource(&resources, &vmd->resources[0]);
625         pci_add_resource(&resources, &vmd->resources[1]);
626         pci_add_resource(&resources, &vmd->resources[2]);
627         vmd->bus = pci_create_root_bus(&vmd->dev->dev, 0, &vmd_ops, sd,
628                                        &resources);
629         if (!vmd->bus) {
630                 pci_free_resource_list(&resources);
631                 irq_domain_remove(vmd->irq_domain);
632                 return -ENODEV;
633         }
634
635         vmd_attach_resources(vmd);
636         vmd_setup_dma_ops(vmd);
637         dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain);
638         pci_rescan_bus(vmd->bus);
639
640         WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj,
641                                "domain"), "Can't create symlink to domain\n");
642         return 0;
643 }
644
645 static irqreturn_t vmd_irq(int irq, void *data)
646 {
647         struct vmd_irq_list *irqs = data;
648         struct vmd_irq *vmdirq;
649
650         rcu_read_lock();
651         list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node)
652                 generic_handle_irq(vmdirq->virq);
653         rcu_read_unlock();
654
655         return IRQ_HANDLED;
656 }
657
658 static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
659 {
660         struct vmd_dev *vmd;
661         int i, err;
662
663         if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20))
664                 return -ENOMEM;
665
666         vmd = devm_kzalloc(&dev->dev, sizeof(*vmd), GFP_KERNEL);
667         if (!vmd)
668                 return -ENOMEM;
669
670         vmd->dev = dev;
671         err = pcim_enable_device(dev);
672         if (err < 0)
673                 return err;
674
675         vmd->cfgbar = pcim_iomap(dev, VMD_CFGBAR, 0);
676         if (!vmd->cfgbar)
677                 return -ENOMEM;
678
679         pci_set_master(dev);
680         if (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)) &&
681             dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32)))
682                 return -ENODEV;
683
684         vmd->msix_count = pci_msix_vec_count(dev);
685         if (vmd->msix_count < 0)
686                 return -ENODEV;
687
688         vmd->msix_count = pci_alloc_irq_vectors(dev, 1, vmd->msix_count,
689                                         PCI_IRQ_MSIX | PCI_IRQ_AFFINITY);
690         if (vmd->msix_count < 0)
691                 return vmd->msix_count;
692
693         vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs),
694                                  GFP_KERNEL);
695         if (!vmd->irqs)
696                 return -ENOMEM;
697
698         for (i = 0; i < vmd->msix_count; i++) {
699                 INIT_LIST_HEAD(&vmd->irqs[i].irq_list);
700                 err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i),
701                                        vmd_irq, 0, "vmd", &vmd->irqs[i]);
702                 if (err)
703                         return err;
704         }
705
706         spin_lock_init(&vmd->cfg_lock);
707         pci_set_drvdata(dev, vmd);
708         err = vmd_enable_domain(vmd);
709         if (err)
710                 return err;
711
712         dev_info(&vmd->dev->dev, "Bound to PCI domain %04x\n",
713                  vmd->sysdata.domain);
714         return 0;
715 }
716
717 static void vmd_remove(struct pci_dev *dev)
718 {
719         struct vmd_dev *vmd = pci_get_drvdata(dev);
720
721         vmd_detach_resources(vmd);
722         pci_set_drvdata(dev, NULL);
723         sysfs_remove_link(&vmd->dev->dev.kobj, "domain");
724         pci_stop_root_bus(vmd->bus);
725         pci_remove_root_bus(vmd->bus);
726         vmd_teardown_dma_ops(vmd);
727         irq_domain_remove(vmd->irq_domain);
728 }
729
730 #ifdef CONFIG_PM
731 static int vmd_suspend(struct device *dev)
732 {
733         struct pci_dev *pdev = to_pci_dev(dev);
734
735         pci_save_state(pdev);
736         return 0;
737 }
738
739 static int vmd_resume(struct device *dev)
740 {
741         struct pci_dev *pdev = to_pci_dev(dev);
742
743         pci_restore_state(pdev);
744         return 0;
745 }
746 #endif
747 static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops, vmd_suspend, vmd_resume);
748
749 static const struct pci_device_id vmd_ids[] = {
750         {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x201d),},
751         {0,}
752 };
753 MODULE_DEVICE_TABLE(pci, vmd_ids);
754
755 static struct pci_driver vmd_drv = {
756         .name           = "vmd",
757         .id_table       = vmd_ids,
758         .probe          = vmd_probe,
759         .remove         = vmd_remove,
760         .driver         = {
761                 .pm     = &vmd_dev_pm_ops,
762         },
763 };
764 module_pci_driver(vmd_drv);
765
766 MODULE_AUTHOR("Intel Corporation");
767 MODULE_LICENSE("GPL v2");
768 MODULE_VERSION("0.6");