Merge tag 'for-linus-5.0-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 18 Jan 2019 17:53:41 +0000 (05:53 +1200)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 18 Jan 2019 17:53:41 +0000 (05:53 +1200)
Pull xen fixes from Juergen Gross:

 - Several fixes for the Xen pvcalls drivers (1 fix for the backend and
   8 for the frontend).

 - A fix for a rather longstanding bug in the Xen sched_clock()
   interface which led to weird time jumps when migrating the system.

 - A fix for avoiding accesses to x2apic MSRs in Xen PV guests.

* tag 'for-linus-5.0-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
  xen: Fix x86 sched_clock() interface for xen
  pvcalls-front: fix potential null dereference
  always clear the X2APIC_ENABLE bit for PV guest
  pvcalls-front: Avoid get_free_pages(GFP_KERNEL) under spinlock
  xen/pvcalls: remove set but not used variable 'intf'
  pvcalls-back: set -ENOTCONN in pvcalls_conn_back_read
  pvcalls-front: don't return error when the ring is full
  pvcalls-front: properly allocate sk
  pvcalls-front: don't try to free unallocated rings
  pvcalls-front: read all data before closing the connection

arch/x86/xen/enlighten_pv.c
arch/x86/xen/time.c
drivers/xen/events/events_base.c
drivers/xen/pvcalls-back.c
drivers/xen/pvcalls-front.c

index 2f6787fc710660aae1598c8e245d04101f77cb18..c54a493e139a78e37eab27cc36556396303a390e 100644 (file)
@@ -898,10 +898,7 @@ static u64 xen_read_msr_safe(unsigned int msr, int *err)
        val = native_read_msr_safe(msr, err);
        switch (msr) {
        case MSR_IA32_APICBASE:
-#ifdef CONFIG_X86_X2APIC
-               if (!(cpuid_ecx(1) & (1 << (X86_FEATURE_X2APIC & 31))))
-#endif
-                       val &= ~X2APIC_ENABLE;
+               val &= ~X2APIC_ENABLE;
                break;
        }
        return val;
index 72bf446c3fee3456e9961c760a1b3d48520b1b37..6e29794573b72f18d9179a17d78d0981b84c5094 100644 (file)
@@ -361,8 +361,6 @@ void xen_timer_resume(void)
 {
        int cpu;
 
-       pvclock_resume();
-
        if (xen_clockevent != &xen_vcpuop_clockevent)
                return;
 
@@ -379,12 +377,15 @@ static const struct pv_time_ops xen_time_ops __initconst = {
 };
 
 static struct pvclock_vsyscall_time_info *xen_clock __read_mostly;
+static u64 xen_clock_value_saved;
 
 void xen_save_time_memory_area(void)
 {
        struct vcpu_register_time_memory_area t;
        int ret;
 
+       xen_clock_value_saved = xen_clocksource_read() - xen_sched_clock_offset;
+
        if (!xen_clock)
                return;
 
@@ -404,7 +405,7 @@ void xen_restore_time_memory_area(void)
        int ret;
 
        if (!xen_clock)
-               return;
+               goto out;
 
        t.addr.v = &xen_clock->pvti;
 
@@ -421,6 +422,11 @@ void xen_restore_time_memory_area(void)
        if (ret != 0)
                pr_notice("Cannot restore secondary vcpu_time_info (err %d)",
                          ret);
+
+out:
+       /* Need pvclock_resume() before using xen_clocksource_read(). */
+       pvclock_resume();
+       xen_sched_clock_offset = xen_clocksource_read() - xen_clock_value_saved;
 }
 
 static void xen_setup_vsyscall_time_info(void)
index 93194f3e75404f05655028531d5ae710a6d6db87..117e76b2f9391a1983a0c46b3276e7606412977e 100644 (file)
@@ -1650,7 +1650,7 @@ void xen_callback_vector(void)
                        xen_have_vector_callback = 0;
                        return;
                }
-               pr_info("Xen HVM callback vector for event delivery is enabled\n");
+               pr_info_once("Xen HVM callback vector for event delivery is enabled\n");
                alloc_intr_gate(HYPERVISOR_CALLBACK_VECTOR,
                                xen_hvm_callback_vector);
        }
index 2e5d845b5091478252dfb1ed17395cf9c5ce870a..7aa64d1b119c2c7b8c99efaae07bb65e1964db1f 100644 (file)
@@ -160,9 +160,10 @@ static void pvcalls_conn_back_read(void *opaque)
 
        /* write the data, then modify the indexes */
        virt_wmb();
-       if (ret < 0)
+       if (ret < 0) {
+               atomic_set(&map->read, 0);
                intf->in_error = ret;
-       else
+       else
                intf->in_prod = prod + ret;
        /* update the indexes, then notify the other end */
        virt_wmb();
@@ -282,13 +283,11 @@ static int pvcalls_back_socket(struct xenbus_device *dev,
 static void pvcalls_sk_state_change(struct sock *sock)
 {
        struct sock_mapping *map = sock->sk_user_data;
-       struct pvcalls_data_intf *intf;
 
        if (map == NULL)
                return;
 
-       intf = map->ring;
-       intf->in_error = -ENOTCONN;
+       atomic_inc(&map->read);
        notify_remote_via_irq(map->irq);
 }
 
index 77224d8f3e6fe6ee17cb06f81f20be18069422a9..8a249c95c19342059d9eb06dcf1aeb74f4936458 100644 (file)
 #define PVCALLS_NR_RSP_PER_RING __CONST_RING_SIZE(xen_pvcalls, XEN_PAGE_SIZE)
 #define PVCALLS_FRONT_MAX_SPIN 5000
 
+static struct proto pvcalls_proto = {
+       .name   = "PVCalls",
+       .owner  = THIS_MODULE,
+       .obj_size = sizeof(struct sock),
+};
+
 struct pvcalls_bedata {
        struct xen_pvcalls_front_ring ring;
        grant_ref_t ref;
@@ -335,6 +341,42 @@ int pvcalls_front_socket(struct socket *sock)
        return ret;
 }
 
+static void free_active_ring(struct sock_mapping *map)
+{
+       if (!map->active.ring)
+               return;
+
+       free_pages((unsigned long)map->active.data.in,
+                       map->active.ring->ring_order);
+       free_page((unsigned long)map->active.ring);
+}
+
+static int alloc_active_ring(struct sock_mapping *map)
+{
+       void *bytes;
+
+       map->active.ring = (struct pvcalls_data_intf *)
+               get_zeroed_page(GFP_KERNEL);
+       if (!map->active.ring)
+               goto out;
+
+       map->active.ring->ring_order = PVCALLS_RING_ORDER;
+       bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+                                       PVCALLS_RING_ORDER);
+       if (!bytes)
+               goto out;
+
+       map->active.data.in = bytes;
+       map->active.data.out = bytes +
+               XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER);
+
+       return 0;
+
+out:
+       free_active_ring(map);
+       return -ENOMEM;
+}
+
 static int create_active(struct sock_mapping *map, int *evtchn)
 {
        void *bytes;
@@ -343,15 +385,7 @@ static int create_active(struct sock_mapping *map, int *evtchn)
        *evtchn = -1;
        init_waitqueue_head(&map->active.inflight_conn_req);
 
-       map->active.ring = (struct pvcalls_data_intf *)
-               __get_free_page(GFP_KERNEL | __GFP_ZERO);
-       if (map->active.ring == NULL)
-               goto out_error;
-       map->active.ring->ring_order = PVCALLS_RING_ORDER;
-       bytes = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
-                                       PVCALLS_RING_ORDER);
-       if (bytes == NULL)
-               goto out_error;
+       bytes = map->active.data.in;
        for (i = 0; i < (1 << PVCALLS_RING_ORDER); i++)
                map->active.ring->ref[i] = gnttab_grant_foreign_access(
                        pvcalls_front_dev->otherend_id,
@@ -361,10 +395,6 @@ static int create_active(struct sock_mapping *map, int *evtchn)
                pvcalls_front_dev->otherend_id,
                pfn_to_gfn(virt_to_pfn((void *)map->active.ring)), 0);
 
-       map->active.data.in = bytes;
-       map->active.data.out = bytes +
-               XEN_FLEX_RING_SIZE(PVCALLS_RING_ORDER);
-
        ret = xenbus_alloc_evtchn(pvcalls_front_dev, evtchn);
        if (ret)
                goto out_error;
@@ -385,8 +415,6 @@ static int create_active(struct sock_mapping *map, int *evtchn)
 out_error:
        if (*evtchn >= 0)
                xenbus_free_evtchn(pvcalls_front_dev, *evtchn);
-       free_pages((unsigned long)map->active.data.in, PVCALLS_RING_ORDER);
-       free_page((unsigned long)map->active.ring);
        return ret;
 }
 
@@ -406,17 +434,24 @@ int pvcalls_front_connect(struct socket *sock, struct sockaddr *addr,
                return PTR_ERR(map);
 
        bedata = dev_get_drvdata(&pvcalls_front_dev->dev);
+       ret = alloc_active_ring(map);
+       if (ret < 0) {
+               pvcalls_exit_sock(sock);
+               return ret;
+       }
 
        spin_lock(&bedata->socket_lock);
        ret = get_request(bedata, &req_id);
        if (ret < 0) {
                spin_unlock(&bedata->socket_lock);
+               free_active_ring(map);
                pvcalls_exit_sock(sock);
                return ret;
        }
        ret = create_active(map, &evtchn);
        if (ret < 0) {
                spin_unlock(&bedata->socket_lock);
+               free_active_ring(map);
                pvcalls_exit_sock(sock);
                return ret;
        }
@@ -469,8 +504,10 @@ static int __write_ring(struct pvcalls_data_intf *intf,
        virt_mb();
 
        size = pvcalls_queued(prod, cons, array_size);
-       if (size >= array_size)
+       if (size > array_size)
                return -EINVAL;
+       if (size == array_size)
+               return 0;
        if (len > array_size - size)
                len = array_size - size;
 
@@ -560,15 +597,13 @@ static int __read_ring(struct pvcalls_data_intf *intf,
        error = intf->in_error;
        /* get pointers before reading from the ring */
        virt_rmb();
-       if (error < 0)
-               return error;
 
        size = pvcalls_queued(prod, cons, array_size);
        masked_prod = pvcalls_mask(prod, array_size);
        masked_cons = pvcalls_mask(cons, array_size);
 
        if (size == 0)
-               return 0;
+               return error ?: size;
 
        if (len > size)
                len = size;
@@ -780,25 +815,36 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
                }
        }
 
-       spin_lock(&bedata->socket_lock);
-       ret = get_request(bedata, &req_id);
-       if (ret < 0) {
+       map2 = kzalloc(sizeof(*map2), GFP_KERNEL);
+       if (map2 == NULL) {
                clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
                          (void *)&map->passive.flags);
-               spin_unlock(&bedata->socket_lock);
+               pvcalls_exit_sock(sock);
+               return -ENOMEM;
+       }
+       ret = alloc_active_ring(map2);
+       if (ret < 0) {
+               clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
+                               (void *)&map->passive.flags);
+               kfree(map2);
                pvcalls_exit_sock(sock);
                return ret;
        }
-       map2 = kzalloc(sizeof(*map2), GFP_ATOMIC);
-       if (map2 == NULL) {
+       spin_lock(&bedata->socket_lock);
+       ret = get_request(bedata, &req_id);
+       if (ret < 0) {
                clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
                          (void *)&map->passive.flags);
                spin_unlock(&bedata->socket_lock);
+               free_active_ring(map2);
+               kfree(map2);
                pvcalls_exit_sock(sock);
-               return -ENOMEM;
+               return ret;
        }
+
        ret = create_active(map2, &evtchn);
        if (ret < 0) {
+               free_active_ring(map2);
                kfree(map2);
                clear_bit(PVCALLS_FLAG_ACCEPT_INFLIGHT,
                          (void *)&map->passive.flags);
@@ -839,7 +885,7 @@ int pvcalls_front_accept(struct socket *sock, struct socket *newsock, int flags)
 
 received:
        map2->sock = newsock;
-       newsock->sk = kzalloc(sizeof(*newsock->sk), GFP_KERNEL);
+       newsock->sk = sk_alloc(sock_net(sock->sk), PF_INET, GFP_KERNEL, &pvcalls_proto, false);
        if (!newsock->sk) {
                bedata->rsp[req_id].req_id = PVCALLS_INVALID_ID;
                map->passive.inflight_req_id = PVCALLS_INVALID_ID;
@@ -1032,8 +1078,8 @@ int pvcalls_front_release(struct socket *sock)
                spin_lock(&bedata->socket_lock);
                list_del(&map->list);
                spin_unlock(&bedata->socket_lock);
-               if (READ_ONCE(map->passive.inflight_req_id) !=
-                   PVCALLS_INVALID_ID) {
+               if (READ_ONCE(map->passive.inflight_req_id) != PVCALLS_INVALID_ID &&
+                       READ_ONCE(map->passive.inflight_req_id) != 0) {
                        pvcalls_front_free_map(bedata,
                                               map->passive.accept_map);
                }