net/mlx5e: Replace PTP clock lock from RW lock to seq lock
authorShay Agroskin <shayag@mellanox.com>
Tue, 5 Jun 2018 06:22:18 +0000 (09:22 +0300)
committerSaeed Mahameed <saeedm@mellanox.com>
Thu, 6 Sep 2018 04:14:57 +0000 (21:14 -0700)
Changed "priv.clock.lock" lock from 'rw_lock' to 'seq_lock'
in order to improve packet rate performance.

Tested on Intel(R) Xeon(R) CPU E5-2660 v2 @ 2.20GHz.
Sent 64b packets between two peers connected by ConnectX-5,
and measured packet rate for the receiver in three modes:
no time-stamping (base rate)
time-stamping using rw_lock (old lock) for critical region
time-stamping using seq_lock (new lock) for critical region
Only the receiver time stamped its packets.

The measured packet rate improvements are:

Single flow (multiple TX rings to single RX ring):
without timestamping:   4.26 (M packets)/sec
with rw-lock (old lock):  4.1  (M packets)/sec
with seq-lock (new lock): 4.16 (M packets)/sec
1.46% improvement

Multiple flows (multiple TX rings to six RX rings):
without timestamping:    22   (M packets)/sec
with rw-lock (old lock):  11.7 (M packets)/sec
with seq-lock (new lock): 21.3 (M packets)/sec
82.05% improvement

The packet rate improvement is due to the lack of atomic operations
for the 'readers' by the seq-lock.
Since there are much more 'readers' than 'writers' contention
on this lock, almost all atomic operations are saved.
this results in a dramatic decrease in overall
cache misses.

Signed-off-by: Shay Agroskin <shayag@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.h
include/linux/mlx5/driver.h

index 3f767cde4c1d50cbcd50d2eb670164fc20802983..0d90b1b4a3d388c2793de0a8f688c605f3c3abfd 100644 (file)
@@ -111,10 +111,10 @@ static void mlx5_pps_out(struct work_struct *work)
        for (i = 0; i < clock->ptp_info.n_pins; i++) {
                u64 tstart;
 
        for (i = 0; i < clock->ptp_info.n_pins; i++) {
                u64 tstart;
 
-               write_lock_irqsave(&clock->lock, flags);
+               write_seqlock_irqsave(&clock->lock, flags);
                tstart = clock->pps_info.start[i];
                clock->pps_info.start[i] = 0;
                tstart = clock->pps_info.start[i];
                clock->pps_info.start[i] = 0;
-               write_unlock_irqrestore(&clock->lock, flags);
+               write_sequnlock_irqrestore(&clock->lock, flags);
                if (!tstart)
                        continue;
 
                if (!tstart)
                        continue;
 
@@ -132,10 +132,10 @@ static void mlx5_timestamp_overflow(struct work_struct *work)
                                                overflow_work);
        unsigned long flags;
 
                                                overflow_work);
        unsigned long flags;
 
-       write_lock_irqsave(&clock->lock, flags);
+       write_seqlock_irqsave(&clock->lock, flags);
        timecounter_read(&clock->tc);
        mlx5_update_clock_info_page(clock->mdev);
        timecounter_read(&clock->tc);
        mlx5_update_clock_info_page(clock->mdev);
-       write_unlock_irqrestore(&clock->lock, flags);
+       write_sequnlock_irqrestore(&clock->lock, flags);
        schedule_delayed_work(&clock->overflow_work, clock->overflow_period);
 }
 
        schedule_delayed_work(&clock->overflow_work, clock->overflow_period);
 }
 
@@ -147,10 +147,10 @@ static int mlx5_ptp_settime(struct ptp_clock_info *ptp,
        u64 ns = timespec64_to_ns(ts);
        unsigned long flags;
 
        u64 ns = timespec64_to_ns(ts);
        unsigned long flags;
 
-       write_lock_irqsave(&clock->lock, flags);
+       write_seqlock_irqsave(&clock->lock, flags);
        timecounter_init(&clock->tc, &clock->cycles, ns);
        mlx5_update_clock_info_page(clock->mdev);
        timecounter_init(&clock->tc, &clock->cycles, ns);
        mlx5_update_clock_info_page(clock->mdev);
-       write_unlock_irqrestore(&clock->lock, flags);
+       write_sequnlock_irqrestore(&clock->lock, flags);
 
        return 0;
 }
 
        return 0;
 }
@@ -162,9 +162,9 @@ static int mlx5_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
        u64 ns;
        unsigned long flags;
 
        u64 ns;
        unsigned long flags;
 
-       write_lock_irqsave(&clock->lock, flags);
+       write_seqlock_irqsave(&clock->lock, flags);
        ns = timecounter_read(&clock->tc);
        ns = timecounter_read(&clock->tc);
-       write_unlock_irqrestore(&clock->lock, flags);
+       write_sequnlock_irqrestore(&clock->lock, flags);
 
        *ts = ns_to_timespec64(ns);
 
 
        *ts = ns_to_timespec64(ns);
 
@@ -177,10 +177,10 @@ static int mlx5_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
                                                ptp_info);
        unsigned long flags;
 
                                                ptp_info);
        unsigned long flags;
 
-       write_lock_irqsave(&clock->lock, flags);
+       write_seqlock_irqsave(&clock->lock, flags);
        timecounter_adjtime(&clock->tc, delta);
        mlx5_update_clock_info_page(clock->mdev);
        timecounter_adjtime(&clock->tc, delta);
        mlx5_update_clock_info_page(clock->mdev);
-       write_unlock_irqrestore(&clock->lock, flags);
+       write_sequnlock_irqrestore(&clock->lock, flags);
 
        return 0;
 }
 
        return 0;
 }
@@ -203,12 +203,12 @@ static int mlx5_ptp_adjfreq(struct ptp_clock_info *ptp, s32 delta)
        adj *= delta;
        diff = div_u64(adj, 1000000000ULL);
 
        adj *= delta;
        diff = div_u64(adj, 1000000000ULL);
 
-       write_lock_irqsave(&clock->lock, flags);
+       write_seqlock_irqsave(&clock->lock, flags);
        timecounter_read(&clock->tc);
        clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
                                       clock->nominal_c_mult + diff;
        mlx5_update_clock_info_page(clock->mdev);
        timecounter_read(&clock->tc);
        clock->cycles.mult = neg_adj ? clock->nominal_c_mult - diff :
                                       clock->nominal_c_mult + diff;
        mlx5_update_clock_info_page(clock->mdev);
-       write_unlock_irqrestore(&clock->lock, flags);
+       write_sequnlock_irqrestore(&clock->lock, flags);
 
        return 0;
 }
 
        return 0;
 }
@@ -307,12 +307,12 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
                ts.tv_nsec = rq->perout.start.nsec;
                ns = timespec64_to_ns(&ts);
                cycles_now = mlx5_read_internal_timer(mdev);
                ts.tv_nsec = rq->perout.start.nsec;
                ns = timespec64_to_ns(&ts);
                cycles_now = mlx5_read_internal_timer(mdev);
-               write_lock_irqsave(&clock->lock, flags);
+               write_seqlock_irqsave(&clock->lock, flags);
                nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
                nsec_delta = ns - nsec_now;
                cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
                                         clock->cycles.mult);
                nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
                nsec_delta = ns - nsec_now;
                cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
                                         clock->cycles.mult);
-               write_unlock_irqrestore(&clock->lock, flags);
+               write_sequnlock_irqrestore(&clock->lock, flags);
                time_stamp = cycles_now + cycles_delta;
                field_select = MLX5_MTPPS_FS_PIN_MODE |
                               MLX5_MTPPS_FS_PATTERN |
                time_stamp = cycles_now + cycles_delta;
                field_select = MLX5_MTPPS_FS_PIN_MODE |
                               MLX5_MTPPS_FS_PATTERN |
@@ -471,14 +471,14 @@ void mlx5_pps_event(struct mlx5_core_dev *mdev,
                ts.tv_sec += 1;
                ts.tv_nsec = 0;
                ns = timespec64_to_ns(&ts);
                ts.tv_sec += 1;
                ts.tv_nsec = 0;
                ns = timespec64_to_ns(&ts);
-               write_lock_irqsave(&clock->lock, flags);
+               write_seqlock_irqsave(&clock->lock, flags);
                nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
                nsec_delta = ns - nsec_now;
                cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
                                         clock->cycles.mult);
                clock->pps_info.start[pin] = cycles_now + cycles_delta;
                schedule_work(&clock->pps_info.out_work);
                nsec_now = timecounter_cyc2time(&clock->tc, cycles_now);
                nsec_delta = ns - nsec_now;
                cycles_delta = div64_u64(nsec_delta << clock->cycles.shift,
                                         clock->cycles.mult);
                clock->pps_info.start[pin] = cycles_now + cycles_delta;
                schedule_work(&clock->pps_info.out_work);
-               write_unlock_irqrestore(&clock->lock, flags);
+               write_sequnlock_irqrestore(&clock->lock, flags);
                break;
        default:
                mlx5_core_err(mdev, " Unhandled event\n");
                break;
        default:
                mlx5_core_err(mdev, " Unhandled event\n");
@@ -498,7 +498,7 @@ void mlx5_init_clock(struct mlx5_core_dev *mdev)
                mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
                return;
        }
                mlx5_core_warn(mdev, "invalid device_frequency_khz, aborting HW clock init\n");
                return;
        }
-       rwlock_init(&clock->lock);
+       seqlock_init(&clock->lock);
        clock->cycles.read = read_internal_timer;
        clock->cycles.shift = MLX5_CYCLES_SHIFT;
        clock->cycles.mult = clocksource_khz2mult(dev_freq,
        clock->cycles.read = read_internal_timer;
        clock->cycles.shift = MLX5_CYCLES_SHIFT;
        clock->cycles.mult = clocksource_khz2mult(dev_freq,
index 02e2e4575e4f88c1068a02709057b753b63a9485..263cb6e2aeee52e5bbdbd698ab3556531529a14a 100644 (file)
@@ -46,11 +46,13 @@ static inline int mlx5_clock_get_ptp_index(struct mlx5_core_dev *mdev)
 static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
                                                u64 timestamp)
 {
 static inline ktime_t mlx5_timecounter_cyc2time(struct mlx5_clock *clock,
                                                u64 timestamp)
 {
+       unsigned int seq;
        u64 nsec;
 
        u64 nsec;
 
-       read_lock(&clock->lock);
-       nsec = timecounter_cyc2time(&clock->tc, timestamp);
-       read_unlock(&clock->lock);
+       do {
+               seq = read_seqbegin(&clock->lock);
+               nsec = timecounter_cyc2time(&clock->tc, timestamp);
+       } while (read_seqretry(&clock->lock, seq));
 
        return ns_to_ktime(nsec);
 }
 
        return ns_to_ktime(nsec);
 }
index 2a0c845f6bdb261fcb4cf9064ff5089f1f86beae..b7fce2c9443dc830fff27bfca4c3ef89c494970f 100644 (file)
@@ -805,7 +805,7 @@ struct mlx5_pps {
 };
 
 struct mlx5_clock {
 };
 
 struct mlx5_clock {
-       rwlock_t                   lock;
+       seqlock_t                  lock;
        struct cyclecounter        cycles;
        struct timecounter         tc;
        struct hwtstamp_config     hwtstamp_config;
        struct cyclecounter        cycles;
        struct timecounter         tc;
        struct hwtstamp_config     hwtstamp_config;