Merge tag 'hyperv-next-signed' of git://git.kernel.org/pub/scm/linux/kernel/git/hyper...
authorLinus Torvalds <torvalds@linux-foundation.org>
Tue, 24 Sep 2019 19:36:31 +0000 (12:36 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Tue, 24 Sep 2019 19:36:31 +0000 (12:36 -0700)
Pull Hyper-V updates from Sasha Levin:

 - first round of vmbus hibernation support (Dexuan Cui)

 - remove dependencies on PAGE_SIZE (Maya Nakamura)

 - move the hyper-v tools/ code into the tools build system (Andy
   Shevchenko)

 - hyper-v balloon cleanups (Dexuan Cui)

* tag 'hyperv-next-signed' of git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux:
  Drivers: hv: vmbus: Resume after fixing up old primary channels
  Drivers: hv: vmbus: Suspend after cleaning up hv_sock and sub channels
  Drivers: hv: vmbus: Clean up hv_sock channels by force upon suspend
  Drivers: hv: vmbus: Suspend/resume the vmbus itself for hibernation
  Drivers: hv: vmbus: Ignore the offers when resuming from hibernation
  Drivers: hv: vmbus: Implement suspend/resume for VSC drivers for hibernation
  Drivers: hv: vmbus: Add a helper function is_sub_channel()
  Drivers: hv: vmbus: Suspend/resume the synic for hibernation
  Drivers: hv: vmbus: Break out synic enable and disable operations
  HID: hv: Remove dependencies on PAGE_SIZE for ring buffer
  Tools: hv: move to tools buildsystem
  hv_balloon: Reorganize the probe function
  hv_balloon: Use a static page for the balloon_up send buffer

drivers/hid/hid-hyperv.c
drivers/hv/channel_mgmt.c
drivers/hv/connection.c
drivers/hv/hv.c
drivers/hv/hv_balloon.c
drivers/hv/hyperv_vmbus.h
drivers/hv/vmbus_drv.c
include/linux/hyperv.h
tools/hv/Build [new file with mode: 0644]
tools/hv/Makefile

index 7795831d37c21f72b107bd7d135c5f04f8c2d107..cc5b09b87ab0dc70b844a40cd9bffe070e078c84 100644 (file)
@@ -104,8 +104,8 @@ struct synthhid_input_report {
 
 #pragma pack(pop)
 
-#define INPUTVSC_SEND_RING_BUFFER_SIZE         (10*PAGE_SIZE)
-#define INPUTVSC_RECV_RING_BUFFER_SIZE         (10*PAGE_SIZE)
+#define INPUTVSC_SEND_RING_BUFFER_SIZE         (40 * 1024)
+#define INPUTVSC_RECV_RING_BUFFER_SIZE         (40 * 1024)
 
 
 enum pipe_prot_msg_type {
index addcef50df7ace897120a23c5b4236c360a886d3..8eb167540b4f66e658e7977858635f6dc8139700 100644 (file)
@@ -407,7 +407,15 @@ void hv_process_channel_removal(struct vmbus_channel *channel)
                cpumask_clear_cpu(channel->target_cpu,
                                  &primary_channel->alloced_cpus_in_node);
 
-       vmbus_release_relid(channel->offermsg.child_relid);
+       /*
+        * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
+        * the relid is invalidated; after hibernation, when the user-space app
+        * destroys the channel, the relid is INVALID_RELID, and in this case
+        * it's unnecessary and unsafe to release the old relid, since the same
+        * relid can refer to a completely different channel now.
+        */
+       if (channel->offermsg.child_relid != INVALID_RELID)
+               vmbus_release_relid(channel->offermsg.child_relid);
 
        free_channel(channel);
 }
@@ -545,6 +553,10 @@ static void vmbus_process_offer(struct vmbus_channel *newchannel)
 
        mutex_lock(&vmbus_connection.channel_mutex);
 
+       /* Remember the channels that should be cleaned up upon suspend. */
+       if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
+               atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
+
        /*
         * Now that we have acquired the channel_mutex,
         * we can release the potentially racing rescind thread.
@@ -847,6 +859,67 @@ void vmbus_initiate_unload(bool crash)
                vmbus_wait_for_unload();
 }
 
+static void check_ready_for_resume_event(void)
+{
+       /*
+        * If all the old primary channels have been fixed up, then it's safe
+        * to resume.
+        */
+       if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
+               complete(&vmbus_connection.ready_for_resume_event);
+}
+
+static void vmbus_setup_channel_state(struct vmbus_channel *channel,
+                                     struct vmbus_channel_offer_channel *offer)
+{
+       /*
+        * Setup state for signalling the host.
+        */
+       channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
+
+       if (vmbus_proto_version != VERSION_WS2008) {
+               channel->is_dedicated_interrupt =
+                               (offer->is_dedicated_interrupt != 0);
+               channel->sig_event = offer->connection_id;
+       }
+
+       memcpy(&channel->offermsg, offer,
+              sizeof(struct vmbus_channel_offer_channel));
+       channel->monitor_grp = (u8)offer->monitorid / 32;
+       channel->monitor_bit = (u8)offer->monitorid % 32;
+}
+
+/*
+ * find_primary_channel_by_offer - Get the channel object given the new offer.
+ * This is only used in the resume path of hibernation.
+ */
+static struct vmbus_channel *
+find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
+{
+       struct vmbus_channel *channel = NULL, *iter;
+       const guid_t *inst1, *inst2;
+
+       /* Ignore sub-channel offers. */
+       if (offer->offer.sub_channel_index != 0)
+               return NULL;
+
+       mutex_lock(&vmbus_connection.channel_mutex);
+
+       list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) {
+               inst1 = &iter->offermsg.offer.if_instance;
+               inst2 = &offer->offer.if_instance;
+
+               if (guid_equal(inst1, inst2)) {
+                       channel = iter;
+                       break;
+               }
+       }
+
+       mutex_unlock(&vmbus_connection.channel_mutex);
+
+       return channel;
+}
+
 /*
  * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
  *
@@ -854,12 +927,58 @@ void vmbus_initiate_unload(bool crash)
 static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
 {
        struct vmbus_channel_offer_channel *offer;
-       struct vmbus_channel *newchannel;
+       struct vmbus_channel *oldchannel, *newchannel;
+       size_t offer_sz;
 
        offer = (struct vmbus_channel_offer_channel *)hdr;
 
        trace_vmbus_onoffer(offer);
 
+       oldchannel = find_primary_channel_by_offer(offer);
+
+       if (oldchannel != NULL) {
+               atomic_dec(&vmbus_connection.offer_in_progress);
+
+               /*
+                * We're resuming from hibernation: all the sub-channel and
+                * hv_sock channels we had before the hibernation should have
+                * been cleaned up, and now we must be seeing a re-offered
+                * primary channel that we had before the hibernation.
+                */
+
+               WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
+               /* Fix up the relid. */
+               oldchannel->offermsg.child_relid = offer->child_relid;
+
+               offer_sz = sizeof(*offer);
+               if (memcmp(offer, &oldchannel->offermsg, offer_sz) == 0) {
+                       check_ready_for_resume_event();
+                       return;
+               }
+
+               /*
+                * This is not an error, since the host can also change the
+                * other field(s) of the offer, e.g. on WS RS5 (Build 17763),
+                * the offer->connection_id of the Mellanox VF vmbus device
+                * can change when the host reoffers the device upon resume.
+                */
+               pr_debug("vmbus offer changed: relid=%d\n",
+                        offer->child_relid);
+
+               print_hex_dump_debug("Old vmbus offer: ", DUMP_PREFIX_OFFSET,
+                                    16, 4, &oldchannel->offermsg, offer_sz,
+                                    false);
+               print_hex_dump_debug("New vmbus offer: ", DUMP_PREFIX_OFFSET,
+                                    16, 4, offer, offer_sz, false);
+
+               /* Fix up the old channel. */
+               vmbus_setup_channel_state(oldchannel, offer);
+
+               check_ready_for_resume_event();
+
+               return;
+       }
+
        /* Allocate the channel object and save this offer. */
        newchannel = alloc_channel();
        if (!newchannel) {
@@ -869,25 +988,21 @@ static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
                return;
        }
 
-       /*
-        * Setup state for signalling the host.
-        */
-       newchannel->sig_event = VMBUS_EVENT_CONNECTION_ID;
-
-       if (vmbus_proto_version != VERSION_WS2008) {
-               newchannel->is_dedicated_interrupt =
-                               (offer->is_dedicated_interrupt != 0);
-               newchannel->sig_event = offer->connection_id;
-       }
-
-       memcpy(&newchannel->offermsg, offer,
-              sizeof(struct vmbus_channel_offer_channel));
-       newchannel->monitor_grp = (u8)offer->monitorid / 32;
-       newchannel->monitor_bit = (u8)offer->monitorid % 32;
+       vmbus_setup_channel_state(newchannel, offer);
 
        vmbus_process_offer(newchannel);
 }
 
+static void check_ready_for_suspend_event(void)
+{
+       /*
+        * If all the sub-channels or hv_sock channels have been cleaned up,
+        * then it's safe to suspend.
+        */
+       if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
+               complete(&vmbus_connection.ready_for_suspend_event);
+}
+
 /*
  * vmbus_onoffer_rescind - Rescind offer handler.
  *
@@ -898,6 +1013,7 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
        struct vmbus_channel_rescind_offer *rescind;
        struct vmbus_channel *channel;
        struct device *dev;
+       bool clean_up_chan_for_suspend;
 
        rescind = (struct vmbus_channel_rescind_offer *)hdr;
 
@@ -937,6 +1053,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
                return;
        }
 
+       clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
+                                   is_sub_channel(channel);
        /*
         * Before setting channel->rescind in vmbus_rescind_cleanup(), we
         * should make sure the channel callback is not running any more.
@@ -962,6 +1080,10 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
        if (channel->device_obj) {
                if (channel->chn_rescind_callback) {
                        channel->chn_rescind_callback(channel);
+
+                       if (clean_up_chan_for_suspend)
+                               check_ready_for_suspend_event();
+
                        return;
                }
                /*
@@ -994,6 +1116,11 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
                }
                mutex_unlock(&vmbus_connection.channel_mutex);
        }
+
+       /* The "channel" may have been freed. Do not access it any longer. */
+
+       if (clean_up_chan_for_suspend)
+               check_ready_for_suspend_event();
 }
 
 void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
index 09829e15d4a0c9f5a773886a7995a86ff7054dff..6e4c015783ffca5ec063c59ff5a478d2dc93de46 100644 (file)
 struct vmbus_connection vmbus_connection = {
        .conn_state             = DISCONNECTED,
        .next_gpadl_handle      = ATOMIC_INIT(0xE1E10),
+
+       .ready_for_suspend_event= COMPLETION_INITIALIZER(
+                                 vmbus_connection.ready_for_suspend_event),
+       .ready_for_resume_event = COMPLETION_INITIALIZER(
+                                 vmbus_connection.ready_for_resume_event),
 };
 EXPORT_SYMBOL_GPL(vmbus_connection);
 
@@ -59,8 +64,7 @@ static __u32 vmbus_get_next_version(__u32 current_version)
        }
 }
 
-static int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo,
-                                       __u32 version)
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version)
 {
        int ret = 0;
        unsigned int cur_cpu;
index 6188fb7dda4245e914a7fa46515f5b9161048b67..fcc52797c16935ea47cee18af55ae61de75681d4 100644 (file)
@@ -154,7 +154,7 @@ void hv_synic_free(void)
  * retrieve the initialized message and event pages.  Otherwise, we create and
  * initialize the message and event pages.
  */
-int hv_synic_init(unsigned int cpu)
+void hv_synic_enable_regs(unsigned int cpu)
 {
        struct hv_per_cpu_context *hv_cpu
                = per_cpu_ptr(hv_context.cpu_context, cpu);
@@ -196,6 +196,11 @@ int hv_synic_init(unsigned int cpu)
        sctrl.enable = 1;
 
        hv_set_synic_state(sctrl.as_uint64);
+}
+
+int hv_synic_init(unsigned int cpu)
+{
+       hv_synic_enable_regs(cpu);
 
        hv_stimer_init(cpu);
 
@@ -205,20 +210,45 @@ int hv_synic_init(unsigned int cpu)
 /*
  * hv_synic_cleanup - Cleanup routine for hv_synic_init().
  */
-int hv_synic_cleanup(unsigned int cpu)
+void hv_synic_disable_regs(unsigned int cpu)
 {
        union hv_synic_sint shared_sint;
        union hv_synic_simp simp;
        union hv_synic_siefp siefp;
        union hv_synic_scontrol sctrl;
+
+       hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+       shared_sint.masked = 1;
+
+       /* Need to correctly cleanup in the case of SMP!!! */
+       /* Disable the interrupt */
+       hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
+
+       hv_get_simp(simp.as_uint64);
+       simp.simp_enabled = 0;
+       simp.base_simp_gpa = 0;
+
+       hv_set_simp(simp.as_uint64);
+
+       hv_get_siefp(siefp.as_uint64);
+       siefp.siefp_enabled = 0;
+       siefp.base_siefp_gpa = 0;
+
+       hv_set_siefp(siefp.as_uint64);
+
+       /* Disable the global synic bit */
+       hv_get_synic_state(sctrl.as_uint64);
+       sctrl.enable = 0;
+       hv_set_synic_state(sctrl.as_uint64);
+}
+
+int hv_synic_cleanup(unsigned int cpu)
+{
        struct vmbus_channel *channel, *sc;
        bool channel_found = false;
        unsigned long flags;
 
-       hv_get_synic_state(sctrl.as_uint64);
-       if (sctrl.enable != 1)
-               return -EFAULT;
-
        /*
         * Search for channels which are bound to the CPU we're about to
         * cleanup. In case we find one and vmbus is still connected we need to
@@ -249,29 +279,7 @@ int hv_synic_cleanup(unsigned int cpu)
 
        hv_stimer_cleanup(cpu);
 
-       hv_get_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
-       shared_sint.masked = 1;
-
-       /* Need to correctly cleanup in the case of SMP!!! */
-       /* Disable the interrupt */
-       hv_set_synint_state(VMBUS_MESSAGE_SINT, shared_sint.as_uint64);
-
-       hv_get_simp(simp.as_uint64);
-       simp.simp_enabled = 0;
-       simp.base_simp_gpa = 0;
-
-       hv_set_simp(simp.as_uint64);
-
-       hv_get_siefp(siefp.as_uint64);
-       siefp.siefp_enabled = 0;
-       siefp.base_siefp_gpa = 0;
-
-       hv_set_siefp(siefp.as_uint64);
-
-       /* Disable the global synic bit */
-       sctrl.enable = 0;
-       hv_set_synic_state(sctrl.as_uint64);
+       hv_synic_disable_regs(cpu);
 
        return 0;
 }
index 6fb4ea5f03046a1ee875cac7873cace30fc0bf75..34bd73526afdcfed1857c3c33e731d68c720abad 100644 (file)
@@ -494,7 +494,7 @@ enum hv_dm_state {
 
 
 static __u8 recv_buffer[PAGE_SIZE];
-static __u8 *send_buffer;
+static __u8 balloon_up_send_buffer[PAGE_SIZE];
 #define PAGES_IN_2M    512
 #define HA_CHUNK (32 * 1024)
 
@@ -1292,8 +1292,8 @@ static void balloon_up(struct work_struct *dummy)
        }
 
        while (!done) {
-               bl_resp = (struct dm_balloon_response *)send_buffer;
-               memset(send_buffer, 0, PAGE_SIZE);
+               memset(balloon_up_send_buffer, 0, PAGE_SIZE);
+               bl_resp = (struct dm_balloon_response *)balloon_up_send_buffer;
                bl_resp->hdr.type = DM_BALLOON_RESPONSE;
                bl_resp->hdr.size = sizeof(struct dm_balloon_response);
                bl_resp->more_pages = 1;
@@ -1564,58 +1564,18 @@ static void balloon_onchannelcallback(void *context)
 
 }
 
-static int balloon_probe(struct hv_device *dev,
-                       const struct hv_vmbus_device_id *dev_id)
+static int balloon_connect_vsp(struct hv_device *dev)
 {
-       int ret;
-       unsigned long t;
        struct dm_version_request version_req;
        struct dm_capabilities cap_msg;
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-       do_hot_add = hot_add;
-#else
-       do_hot_add = false;
-#endif
-
-       /*
-        * First allocate a send buffer.
-        */
-
-       send_buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
-       if (!send_buffer)
-               return -ENOMEM;
+       unsigned long t;
+       int ret;
 
        ret = vmbus_open(dev->channel, dm_ring_size, dm_ring_size, NULL, 0,
-                       balloon_onchannelcallback, dev);
-
+                        balloon_onchannelcallback, dev);
        if (ret)
-               goto probe_error0;
+               return ret;
 
-       dm_device.dev = dev;
-       dm_device.state = DM_INITIALIZING;
-       dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
-       init_completion(&dm_device.host_event);
-       init_completion(&dm_device.config_event);
-       INIT_LIST_HEAD(&dm_device.ha_region_list);
-       spin_lock_init(&dm_device.ha_lock);
-       INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
-       INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
-       dm_device.host_specified_ha_region = false;
-
-       dm_device.thread =
-                kthread_run(dm_thread_func, &dm_device, "hv_balloon");
-       if (IS_ERR(dm_device.thread)) {
-               ret = PTR_ERR(dm_device.thread);
-               goto probe_error1;
-       }
-
-#ifdef CONFIG_MEMORY_HOTPLUG
-       set_online_page_callback(&hv_online_page);
-       register_memory_notifier(&hv_memory_nb);
-#endif
-
-       hv_set_drvdata(dev, &dm_device);
        /*
         * Initiate the hand shake with the host and negotiate
         * a version that the host can support. We start with the
@@ -1631,16 +1591,15 @@ static int balloon_probe(struct hv_device *dev,
        dm_device.version = version_req.version.version;
 
        ret = vmbus_sendpacket(dev->channel, &version_req,
-                               sizeof(struct dm_version_request),
-                               (unsigned long)NULL,
-                               VM_PKT_DATA_INBAND, 0);
+                              sizeof(struct dm_version_request),
+                              (unsigned long)NULL, VM_PKT_DATA_INBAND, 0);
        if (ret)
-               goto probe_error2;
+               goto out;
 
        t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
        if (t == 0) {
                ret = -ETIMEDOUT;
-               goto probe_error2;
+               goto out;
        }
 
        /*
@@ -1648,8 +1607,8 @@ static int balloon_probe(struct hv_device *dev,
         * fail the probe function.
         */
        if (dm_device.state == DM_INIT_ERROR) {
-               ret = -ETIMEDOUT;
-               goto probe_error2;
+               ret = -EPROTO;
+               goto out;
        }
 
        pr_info("Using Dynamic Memory protocol version %u.%u\n",
@@ -1682,16 +1641,15 @@ static int balloon_probe(struct hv_device *dev,
        cap_msg.max_page_number = -1;
 
        ret = vmbus_sendpacket(dev->channel, &cap_msg,
-                               sizeof(struct dm_capabilities),
-                               (unsigned long)NULL,
-                               VM_PKT_DATA_INBAND, 0);
+                              sizeof(struct dm_capabilities),
+                              (unsigned long)NULL, VM_PKT_DATA_INBAND, 0);
        if (ret)
-               goto probe_error2;
+               goto out;
 
        t = wait_for_completion_timeout(&dm_device.host_event, 5*HZ);
        if (t == 0) {
                ret = -ETIMEDOUT;
-               goto probe_error2;
+               goto out;
        }
 
        /*
@@ -1699,25 +1657,65 @@ static int balloon_probe(struct hv_device *dev,
         * fail the probe function.
         */
        if (dm_device.state == DM_INIT_ERROR) {
-               ret = -ETIMEDOUT;
-               goto probe_error2;
+               ret = -EPROTO;
+               goto out;
        }
 
+       return 0;
+out:
+       vmbus_close(dev->channel);
+       return ret;
+}
+
+static int balloon_probe(struct hv_device *dev,
+                        const struct hv_vmbus_device_id *dev_id)
+{
+       int ret;
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+       do_hot_add = hot_add;
+#else
+       do_hot_add = false;
+#endif
+       dm_device.dev = dev;
+       dm_device.state = DM_INITIALIZING;
+       dm_device.next_version = DYNMEM_PROTOCOL_VERSION_WIN8;
+       init_completion(&dm_device.host_event);
+       init_completion(&dm_device.config_event);
+       INIT_LIST_HEAD(&dm_device.ha_region_list);
+       spin_lock_init(&dm_device.ha_lock);
+       INIT_WORK(&dm_device.balloon_wrk.wrk, balloon_up);
+       INIT_WORK(&dm_device.ha_wrk.wrk, hot_add_req);
+       dm_device.host_specified_ha_region = false;
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+       set_online_page_callback(&hv_online_page);
+       register_memory_notifier(&hv_memory_nb);
+#endif
+
+       hv_set_drvdata(dev, &dm_device);
+
+       ret = balloon_connect_vsp(dev);
+       if (ret != 0)
+               return ret;
+
        dm_device.state = DM_INITIALIZED;
-       last_post_time = jiffies;
+
+       dm_device.thread =
+                kthread_run(dm_thread_func, &dm_device, "hv_balloon");
+       if (IS_ERR(dm_device.thread)) {
+               ret = PTR_ERR(dm_device.thread);
+               goto probe_error;
+       }
 
        return 0;
 
-probe_error2:
+probe_error:
+       vmbus_close(dev->channel);
 #ifdef CONFIG_MEMORY_HOTPLUG
+       unregister_memory_notifier(&hv_memory_nb);
        restore_online_page_callback(&hv_online_page);
 #endif
-       kthread_stop(dm_device.thread);
-
-probe_error1:
-       vmbus_close(dev->channel);
-probe_error0:
-       kfree(send_buffer);
        return ret;
 }
 
@@ -1734,12 +1732,11 @@ static int balloon_remove(struct hv_device *dev)
        cancel_work_sync(&dm->balloon_wrk.wrk);
        cancel_work_sync(&dm->ha_wrk.wrk);
 
-       vmbus_close(dev->channel);
        kthread_stop(dm->thread);
-       kfree(send_buffer);
+       vmbus_close(dev->channel);
 #ifdef CONFIG_MEMORY_HOTPLUG
-       restore_online_page_callback(&hv_online_page);
        unregister_memory_notifier(&hv_memory_nb);
+       restore_online_page_callback(&hv_online_page);
 #endif
        spin_lock_irqsave(&dm_device.ha_lock, flags);
        list_for_each_entry_safe(has, tmp, &dm->ha_region_list, list) {
index 50eaa1fd6e4599b43bf07dfa3408eff0b9f4562b..af9379a3bf8990fa5ee68f5f0fbddd9391eab3c2 100644 (file)
@@ -169,8 +169,10 @@ extern int hv_synic_alloc(void);
 
 extern void hv_synic_free(void);
 
+extern void hv_synic_enable_regs(unsigned int cpu);
 extern int hv_synic_init(unsigned int cpu);
 
+extern void hv_synic_disable_regs(unsigned int cpu);
 extern int hv_synic_cleanup(unsigned int cpu);
 
 /* Interface */
@@ -256,6 +258,32 @@ struct vmbus_connection {
        struct workqueue_struct *work_queue;
        struct workqueue_struct *handle_primary_chan_wq;
        struct workqueue_struct *handle_sub_chan_wq;
+
+       /*
+        * The number of sub-channels and hv_sock channels that should be
+        * cleaned up upon suspend: sub-channels will be re-created upon
+        * resume, and hv_sock channels should not survive suspend.
+        */
+       atomic_t nr_chan_close_on_suspend;
+       /*
+        * vmbus_bus_suspend() waits for "nr_chan_close_on_suspend" to
+        * drop to zero.
+        */
+       struct completion ready_for_suspend_event;
+
+       /*
+        * The number of primary channels that should be "fixed up"
+        * upon resume: these channels are re-offered upon resume, and some
+        * fields of the channel offers (i.e. child_relid and connection_id)
+        * can change, so the old offermsg must be fixed up, before the resume
+        * callbacks of the VSC drivers start to further touch the channels.
+        */
+       atomic_t nr_chan_fixup_on_resume;
+       /*
+        * vmbus_bus_resume() waits for "nr_chan_fixup_on_resume" to
+        * drop to zero.
+        */
+       struct completion ready_for_resume_event;
 };
 
 
@@ -270,6 +298,8 @@ struct vmbus_msginfo {
 
 extern struct vmbus_connection vmbus_connection;
 
+int vmbus_negotiate_version(struct vmbus_channel_msginfo *msginfo, u32 version);
+
 static inline void vmbus_send_interrupt(u32 relid)
 {
        sync_set_bit(relid, vmbus_connection.send_int_page);
index ebd35fc3529064e7ee0522273b5d4a1302561926..391f0b225c9ae4698c3cc3d9958b10873cf861a0 100644 (file)
 #include <linux/sched/task_stack.h>
 
 #include <asm/mshyperv.h>
+#include <linux/delay.h>
 #include <linux/notifier.h>
 #include <linux/ptrace.h>
 #include <linux/screen_info.h>
 #include <linux/kdebug.h>
 #include <linux/efi.h>
 #include <linux/random.h>
+#include <linux/syscore_ops.h>
 #include <clocksource/hyperv_timer.h>
 #include "hyperv_vmbus.h"
 
@@ -910,6 +912,43 @@ static void vmbus_shutdown(struct device *child_device)
                drv->shutdown(dev);
 }
 
+/*
+ * vmbus_suspend - Suspend a vmbus device
+ */
+static int vmbus_suspend(struct device *child_device)
+{
+       struct hv_driver *drv;
+       struct hv_device *dev = device_to_hv_device(child_device);
+
+       /* The device may not be attached yet */
+       if (!child_device->driver)
+               return 0;
+
+       drv = drv_to_hv_drv(child_device->driver);
+       if (!drv->suspend)
+               return -EOPNOTSUPP;
+
+       return drv->suspend(dev);
+}
+
+/*
+ * vmbus_resume - Resume a vmbus device
+ */
+static int vmbus_resume(struct device *child_device)
+{
+       struct hv_driver *drv;
+       struct hv_device *dev = device_to_hv_device(child_device);
+
+       /* The device may not be attached yet */
+       if (!child_device->driver)
+               return 0;
+
+       drv = drv_to_hv_drv(child_device->driver);
+       if (!drv->resume)
+               return -EOPNOTSUPP;
+
+       return drv->resume(dev);
+}
 
 /*
  * vmbus_device_release - Final callback release of the vmbus child device
@@ -925,6 +964,14 @@ static void vmbus_device_release(struct device *device)
        kfree(hv_dev);
 }
 
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS: see the comment before vmbus_bus_pm.
+ */
+static const struct dev_pm_ops vmbus_pm = {
+       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_suspend, vmbus_resume)
+};
+
 /* The one and only one */
 static struct bus_type  hv_bus = {
        .name =         "vmbus",
@@ -935,6 +982,7 @@ static struct bus_type  hv_bus = {
        .uevent =               vmbus_uevent,
        .dev_groups =           vmbus_dev_groups,
        .drv_groups =           vmbus_drv_groups,
+       .pm =                   &vmbus_pm,
 };
 
 struct onmessage_work_context {
@@ -1022,6 +1070,41 @@ msg_handled:
        vmbus_signal_eom(msg, message_type);
 }
 
+/*
+ * Fake RESCIND_CHANNEL messages to clean up hv_sock channels by force for
+ * hibernation, because hv_sock connections can not persist across hibernation.
+ */
+static void vmbus_force_channel_rescinded(struct vmbus_channel *channel)
+{
+       struct onmessage_work_context *ctx;
+       struct vmbus_channel_rescind_offer *rescind;
+
+       WARN_ON(!is_hvsock_channel(channel));
+
+       /*
+        * sizeof(*ctx) is small and the allocation should really not fail,
+        * otherwise the state of the hv_sock connections ends up in limbo.
+        */
+       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL | __GFP_NOFAIL);
+
+       /*
+        * So far, these are not really used by Linux. Just set them to the
+        * reasonable values conforming to the definitions of the fields.
+        */
+       ctx->msg.header.message_type = 1;
+       ctx->msg.header.payload_size = sizeof(*rescind);
+
+       /* These values are actually used by Linux. */
+       rescind = (struct vmbus_channel_rescind_offer *)ctx->msg.u.payload;
+       rescind->header.msgtype = CHANNELMSG_RESCIND_CHANNELOFFER;
+       rescind->child_relid = channel->offermsg.child_relid;
+
+       INIT_WORK(&ctx->work, vmbus_onmessage_work);
+
+       queue_work_on(vmbus_connection.connect_cpu,
+                     vmbus_connection.work_queue,
+                     &ctx->work);
+}
 
 /*
  * Direct callback for channels using other deferred processing
@@ -2042,6 +2125,129 @@ acpi_walk_err:
        return ret_val;
 }
 
+static int vmbus_bus_suspend(struct device *dev)
+{
+       struct vmbus_channel *channel, *sc;
+       unsigned long flags;
+
+       while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
+               /*
+                * We wait here until the completion of any channel
+                * offers that are currently in progress.
+                */
+               msleep(1);
+       }
+
+       mutex_lock(&vmbus_connection.channel_mutex);
+       list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+               if (!is_hvsock_channel(channel))
+                       continue;
+
+               vmbus_force_channel_rescinded(channel);
+       }
+       mutex_unlock(&vmbus_connection.channel_mutex);
+
+       /*
+        * Wait until all the sub-channels and hv_sock channels have been
+        * cleaned up. Sub-channels should be destroyed upon suspend, otherwise
+        * they would conflict with the new sub-channels that will be created
+        * in the resume path. hv_sock channels should also be destroyed, but
+        * a hv_sock channel of an established hv_sock connection can not be
+        * really destroyed since it may still be referenced by the userspace
+        * application, so we just force the hv_sock channel to be rescinded
+        * by vmbus_force_channel_rescinded(), and the userspace application
+        * will thoroughly destroy the channel after hibernation.
+        *
+        * Note: the counter nr_chan_close_on_suspend may never go above 0 if
+        * the VM has no sub-channel and hv_sock channel, e.g. a 1-vCPU VM.
+        */
+       if (atomic_read(&vmbus_connection.nr_chan_close_on_suspend) > 0)
+               wait_for_completion(&vmbus_connection.ready_for_suspend_event);
+
+       WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) != 0);
+
+       mutex_lock(&vmbus_connection.channel_mutex);
+
+       list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
+               /*
+                * Invalidate the field. Upon resume, vmbus_onoffer() will fix
+                * up the field, and the other fields (if necessary).
+                */
+               channel->offermsg.child_relid = INVALID_RELID;
+
+               if (is_hvsock_channel(channel)) {
+                       if (!channel->rescind) {
+                               pr_err("hv_sock channel not rescinded!\n");
+                               WARN_ON_ONCE(1);
+                       }
+                       continue;
+               }
+
+               spin_lock_irqsave(&channel->lock, flags);
+               list_for_each_entry(sc, &channel->sc_list, sc_list) {
+                       pr_err("Sub-channel not deleted!\n");
+                       WARN_ON_ONCE(1);
+               }
+               spin_unlock_irqrestore(&channel->lock, flags);
+
+               atomic_inc(&vmbus_connection.nr_chan_fixup_on_resume);
+       }
+
+       mutex_unlock(&vmbus_connection.channel_mutex);
+
+       vmbus_initiate_unload(false);
+
+       vmbus_connection.conn_state = DISCONNECTED;
+
+       /* Reset the event for the next resume. */
+       reinit_completion(&vmbus_connection.ready_for_resume_event);
+
+       return 0;
+}
+
+static int vmbus_bus_resume(struct device *dev)
+{
+       struct vmbus_channel_msginfo *msginfo;
+       size_t msgsize;
+       int ret;
+
+       /*
+        * We only use the 'vmbus_proto_version', which was in use before
+        * hibernation, to re-negotiate with the host.
+        */
+       if (vmbus_proto_version == VERSION_INVAL ||
+           vmbus_proto_version == 0) {
+               pr_err("Invalid proto version = 0x%x\n", vmbus_proto_version);
+               return -EINVAL;
+       }
+
+       msgsize = sizeof(*msginfo) +
+                 sizeof(struct vmbus_channel_initiate_contact);
+
+       msginfo = kzalloc(msgsize, GFP_KERNEL);
+
+       if (msginfo == NULL)
+               return -ENOMEM;
+
+       ret = vmbus_negotiate_version(msginfo, vmbus_proto_version);
+
+       kfree(msginfo);
+
+       if (ret != 0)
+               return ret;
+
+       WARN_ON(atomic_read(&vmbus_connection.nr_chan_fixup_on_resume) == 0);
+
+       vmbus_request_offers();
+
+       wait_for_completion(&vmbus_connection.ready_for_resume_event);
+
+       /* Reset the event for the next suspend. */
+       reinit_completion(&vmbus_connection.ready_for_suspend_event);
+
+       return 0;
+}
+
 static const struct acpi_device_id vmbus_acpi_device_ids[] = {
        {"VMBUS", 0},
        {"VMBus", 0},
@@ -2049,6 +2255,19 @@ static const struct acpi_device_id vmbus_acpi_device_ids[] = {
 };
 MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);
 
+/*
+ * Note: we must use SET_NOIRQ_SYSTEM_SLEEP_PM_OPS rather than
+ * SET_SYSTEM_SLEEP_PM_OPS, otherwise NIC SR-IOV can not work, because the
+ * "pci_dev_pm_ops" uses the "noirq" callbacks: in the resume path, the
+ * pci "noirq" restore callback runs before "non-noirq" callbacks (see
+ * resume_target_kernel() -> dpm_resume_start(), and hibernation_restore() ->
+ * dpm_resume_end()). This means vmbus_bus_resume() and the pci-hyperv's
+ * resume callback must also run via the "noirq" callbacks.
+ */
+static const struct dev_pm_ops vmbus_bus_pm = {
+       SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(vmbus_bus_suspend, vmbus_bus_resume)
+};
+
 static struct acpi_driver vmbus_acpi_driver = {
        .name = "vmbus",
        .ids = vmbus_acpi_device_ids,
@@ -2056,6 +2275,7 @@ static struct acpi_driver vmbus_acpi_driver = {
                .add = vmbus_acpi_add,
                .remove = vmbus_acpi_remove,
        },
+       .drv.pm = &vmbus_bus_pm,
 };
 
 static void hv_kexec_handler(void)
@@ -2086,6 +2306,47 @@ static void hv_crash_handler(struct pt_regs *regs)
        hyperv_cleanup();
 };
 
+static int hv_synic_suspend(void)
+{
+       /*
+        * When we reach here, all the non-boot CPUs have been offlined, and
+        * the stimers on them have been unbound in hv_synic_cleanup() ->
+        * hv_stimer_cleanup() -> clockevents_unbind_device().
+        *
+        * hv_synic_suspend() only runs on CPU0 with interrupts disabled. Here
+        * we do not unbind the stimer on CPU0 because: 1) it's unnecessary
+        * because the interrupts remain disabled between syscore_suspend()
+        * and syscore_resume(): see create_image() and resume_target_kernel();
+        * 2) the stimer on CPU0 is automatically disabled later by
+        * syscore_suspend() -> timekeeping_suspend() -> tick_suspend() -> ...
+        * -> clockevents_shutdown() -> ... -> hv_ce_shutdown(); 3) a warning
+        * would be triggered if we call clockevents_unbind_device(), which
+        * may sleep, in an interrupts-disabled context. So, we intentionally
+        * don't call hv_stimer_cleanup(0) here.
+        */
+
+       hv_synic_disable_regs(0);
+
+       return 0;
+}
+
+static void hv_synic_resume(void)
+{
+       hv_synic_enable_regs(0);
+
+       /*
+        * Note: we don't need to call hv_stimer_init(0), because the timer
+        * on CPU0 is not unbound in hv_synic_suspend(), and the timer is
+        * automatically re-enabled in timekeeping_resume().
+        */
+}
+
+/* The callbacks run only on CPU0, with irqs_disabled. */
+static struct syscore_ops hv_synic_syscore_ops = {
+       .suspend = hv_synic_suspend,
+       .resume = hv_synic_resume,
+};
+
 static int __init hv_acpi_init(void)
 {
        int ret, t;
@@ -2116,6 +2377,8 @@ static int __init hv_acpi_init(void)
        hv_setup_kexec_handler(hv_kexec_handler);
        hv_setup_crash_handler(hv_crash_handler);
 
+       register_syscore_ops(&hv_synic_syscore_ops);
+
        return 0;
 
 cleanup:
@@ -2128,6 +2391,8 @@ static void __exit vmbus_exit(void)
 {
        int cpu;
 
+       unregister_syscore_ops(&hv_synic_syscore_ops);
+
        hv_remove_kexec_handler();
        hv_remove_crash_handler();
        vmbus_connection.conn_state = DISCONNECTED;
index 2afe6fdc1ddadc9e5b9b91bf75921d015d85f51c..b4a017093b697e615b6850860f9d58a8ed65193c 100644 (file)
@@ -245,7 +245,10 @@ struct vmbus_channel_offer {
                } pipe;
        } u;
        /*
-        * The sub_channel_index is defined in win8.
+        * The sub_channel_index is defined in Win8: a value of zero means a
+        * primary channel and a value of non-zero means a sub-channel.
+        *
+        * Before Win8, the field is reserved, meaning it's always zero.
         */
        u16 sub_channel_index;
        u16 reserved3;
@@ -423,6 +426,9 @@ enum vmbus_channel_message_type {
        CHANNELMSG_COUNT
 };
 
+/* Hyper-V supports about 2048 channels, and the RELIDs start with 1. */
+#define INVALID_RELID  U32_MAX
+
 struct vmbus_channel_message_header {
        enum vmbus_channel_message_type msgtype;
        u32 padding;
@@ -934,6 +940,11 @@ static inline bool is_hvsock_channel(const struct vmbus_channel *c)
                  VMBUS_CHANNEL_TLNPI_PROVIDER_OFFER);
 }
 
+static inline bool is_sub_channel(const struct vmbus_channel *c)
+{
+       return c->offermsg.offer.sub_channel_index != 0;
+}
+
 static inline void set_channel_affinity_state(struct vmbus_channel *c,
                                              enum hv_numa_policy policy)
 {
@@ -1149,6 +1160,9 @@ struct hv_driver {
        int (*remove)(struct hv_device *);
        void (*shutdown)(struct hv_device *);
 
+       int (*suspend)(struct hv_device *);
+       int (*resume)(struct hv_device *);
+
 };
 
 /* Base device object */
diff --git a/tools/hv/Build b/tools/hv/Build
new file mode 100644 (file)
index 0000000..6cf51fa
--- /dev/null
@@ -0,0 +1,3 @@
+hv_kvp_daemon-y += hv_kvp_daemon.o
+hv_vss_daemon-y += hv_vss_daemon.o
+hv_fcopy_daemon-y += hv_fcopy_daemon.o
index 5db5e62cebdaeb3277e927ec396abd3ca412218e..b57143d9459c1176fd31389caedcb4907ff29087 100644 (file)
@@ -1,28 +1,55 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for Hyper-V tools
-
-WARNINGS = -Wall -Wextra
-CFLAGS = $(WARNINGS) -g $(shell getconf LFS_CFLAGS)
-
-CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+include ../scripts/Makefile.include
 
 sbindir ?= /usr/sbin
 libexecdir ?= /usr/libexec
 sharedstatedir ?= /var/lib
 
-ALL_PROGRAMS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(CURDIR)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+# Do not use make's built-in rules
+# (this improves performance and avoids hard-to-debug behaviour);
+MAKEFLAGS += -r
+
+override CFLAGS += -O2 -Wall -g -D_GNU_SOURCE -I$(OUTPUT)include
+
+ALL_TARGETS := hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+ALL_PROGRAMS := $(patsubst %,$(OUTPUT)%,$(ALL_TARGETS))
 
 ALL_SCRIPTS := hv_get_dhcp_info.sh hv_get_dns_info.sh hv_set_ifconfig.sh
 
 all: $(ALL_PROGRAMS)
 
-%: %.c
-       $(CC) $(CFLAGS) -o $@ $^
+export srctree OUTPUT CC LD CFLAGS
+include $(srctree)/tools/build/Makefile.include
+
+HV_KVP_DAEMON_IN := $(OUTPUT)hv_kvp_daemon-in.o
+$(HV_KVP_DAEMON_IN): FORCE
+       $(Q)$(MAKE) $(build)=hv_kvp_daemon
+$(OUTPUT)hv_kvp_daemon: $(HV_KVP_DAEMON_IN)
+       $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+HV_VSS_DAEMON_IN := $(OUTPUT)hv_vss_daemon-in.o
+$(HV_VSS_DAEMON_IN): FORCE
+       $(Q)$(MAKE) $(build)=hv_vss_daemon
+$(OUTPUT)hv_vss_daemon: $(HV_VSS_DAEMON_IN)
+       $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
+
+HV_FCOPY_DAEMON_IN := $(OUTPUT)hv_fcopy_daemon-in.o
+$(HV_FCOPY_DAEMON_IN): FORCE
+       $(Q)$(MAKE) $(build)=hv_fcopy_daemon
+$(OUTPUT)hv_fcopy_daemon: $(HV_FCOPY_DAEMON_IN)
+       $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@
 
 clean:
-       $(RM) hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
+       rm -f $(ALL_PROGRAMS)
+       find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.d' -delete
 
-install: all
+install: $(ALL_PROGRAMS)
        install -d -m 755 $(DESTDIR)$(sbindir); \
        install -d -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd; \
        install -d -m 755 $(DESTDIR)$(sharedstatedir); \
@@ -33,3 +60,7 @@ install: all
        for script in $(ALL_SCRIPTS); do \
                install $$script -m 755 $(DESTDIR)$(libexecdir)/hypervkvpd/$${script%.sh}; \
        done
+
+FORCE:
+
+.PHONY: all install clean FORCE prepare