mm: introduce Data Access MONitor (DAMON)

author SeongJae Park <sjpark@amazon.de>

Wed, 8 Sep 2021 02:56:28 +0000 (19:56 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 8 Sep 2021 18:50:24 +0000 (11:50 -0700)
author SeongJae Park <sjpark@amazon.de>
Wed, 8 Sep 2021 02:56:28 +0000 (19:56 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 8 Sep 2021 18:50:24 +0000 (11:50 -0700)
diff --git a/include/linux/damon.h b/include/linux/damon.h

new file mode 100644 (file)

index 0000000..2f65260
--- /dev/null
+++ b/include/linux/damon.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DAMON api
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#ifndef _DAMON_H_
+#define _DAMON_H_
+
+#include <linux/mutex.h>
+#include <linux/time64.h>
+#include <linux/types.h>
+
+struct damon_ctx;
+
+/**
+ * struct damon_primitive      Monitoring primitives for given use cases.
+ *
+ * @init:                      Initialize primitive-internal data structures.
+ * @update:                    Update primitive-internal data structures.
+ * @prepare_access_checks:     Prepare next access check of target regions.
+ * @check_accesses:            Check the accesses to target regions.
+ * @reset_aggregated:          Reset aggregated accesses monitoring results.
+ * @target_valid:              Determine if the target is valid.
+ * @cleanup:                   Clean up the context.
+ *
+ * DAMON can be extended for various address spaces and usages.  For this,
+ * users should register the low level primitives for their target address
+ * space and usecase via the &damon_ctx.primitive.  Then, the monitoring thread
+ * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting
+ * the monitoring, @update after each &damon_ctx.primitive_update_interval, and
+ * @check_accesses, @target_valid and @prepare_access_checks after each
+ * &damon_ctx.sample_interval.  Finally, @reset_aggregated is called after each
+ * &damon_ctx.aggr_interval.
+ *
+ * @init should initialize primitive-internal data structures.  For example,
+ * this could be used to construct proper monitoring target regions and link
+ * those to @damon_ctx.target.
+ * @update should update the primitive-internal data structures.  For example,
+ * this could be used to update monitoring target regions for current status.
+ * @prepare_access_checks should manipulate the monitoring regions to be
+ * prepared for the next access check.
+ * @check_accesses should check the accesses to each region that made after the
+ * last preparation and update the number of observed accesses of each region.
+ * @reset_aggregated should reset the access monitoring results that aggregated
+ * by @check_accesses.
+ * @target_valid should check whether the target is still valid for the
+ * monitoring.
+ * @cleanup is called from @kdamond just before its termination.
+ */
+struct damon_primitive {
+       void (*init)(struct damon_ctx *context);
+       void (*update)(struct damon_ctx *context);
+       void (*prepare_access_checks)(struct damon_ctx *context);
+       void (*check_accesses)(struct damon_ctx *context);
+       void (*reset_aggregated)(struct damon_ctx *context);
+       bool (*target_valid)(void *target);
+       void (*cleanup)(struct damon_ctx *context);
+};
+
+/*
+ * struct damon_callback       Monitoring events notification callbacks.
+ *
+ * @before_start:      Called before starting the monitoring.
+ * @after_sampling:    Called after each sampling.
+ * @after_aggregation: Called after each aggregation.
+ * @before_terminate:  Called before terminating the monitoring.
+ * @private:           User private data.
+ *
+ * The monitoring thread (&damon_ctx.kdamond) calls @before_start and
+ * @before_terminate just before starting and finishing the monitoring,
+ * respectively.  Therefore, those are good places for installing and cleaning
+ * @private.
+ *
+ * The monitoring thread calls @after_sampling and @after_aggregation for each
+ * of the sampling intervals and aggregation intervals, respectively.
+ * Therefore, users can safely access the monitoring results without additional
+ * protection.  For the reason, users are recommended to use these callback for
+ * the accesses to the results.
+ *
+ * If any callback returns non-zero, monitoring stops.
+ */
+struct damon_callback {
+       void *private;
+
+       int (*before_start)(struct damon_ctx *context);
+       int (*after_sampling)(struct damon_ctx *context);
+       int (*after_aggregation)(struct damon_ctx *context);
+       int (*before_terminate)(struct damon_ctx *context);
+};
+
+/**
+ * struct damon_ctx - Represents a context for each monitoring.  This is the
+ * main interface that allows users to set the attributes and get the results
+ * of the monitoring.
+ *
+ * @sample_interval:           The time between access samplings.
+ * @aggr_interval:             The time between monitor results aggregations.
+ * @primitive_update_interval: The time between monitoring primitive updates.
+ *
+ * For each @sample_interval, DAMON checks whether each region is accessed or
+ * not.  It aggregates and keeps the access information (number of accesses to
+ * each region) for @aggr_interval time.  DAMON also checks whether the target
+ * memory regions need update (e.g., by ``mmap()`` calls from the application,
+ * in case of virtual memory monitoring) and applies the changes for each
+ * @primitive_update_interval.  All time intervals are in micro-seconds.
+ * Please refer to &struct damon_primitive and &struct damon_callback for more
+ * detail.
+ *
+ * @kdamond:           Kernel thread who does the monitoring.
+ * @kdamond_stop:      Notifies whether kdamond should stop.
+ * @kdamond_lock:      Mutex for the synchronizations with @kdamond.
+ *
+ * For each monitoring context, one kernel thread for the monitoring is
+ * created.  The pointer to the thread is stored in @kdamond.
+ *
+ * Once started, the monitoring thread runs until explicitly required to be
+ * terminated or every monitoring target is invalid.  The validity of the
+ * targets is checked via the &damon_primitive.target_valid of @primitive.  The
+ * termination can also be explicitly requested by writing non-zero to
+ * @kdamond_stop.  The thread sets @kdamond to NULL when it terminates.
+ * Therefore, users can know whether the monitoring is ongoing or terminated by
+ * reading @kdamond.  Reads and writes to @kdamond and @kdamond_stop from
+ * outside of the monitoring thread must be protected by @kdamond_lock.
+ *
+ * Note that the monitoring thread protects only @kdamond and @kdamond_stop via
+ * @kdamond_lock.  Accesses to other fields must be protected by themselves.
+ *
+ * @primitive: Set of monitoring primitives for given use cases.
+ * @callback:  Set of callbacks for monitoring events notifications.
+ *
+ * @target:    Pointer to the user-defined monitoring target.
+ */
+struct damon_ctx {
+       unsigned long sample_interval;
+       unsigned long aggr_interval;
+       unsigned long primitive_update_interval;
+
+/* private: internal use only */
+       struct timespec64 last_aggregation;
+       struct timespec64 last_primitive_update;
+
+/* public: */
+       struct task_struct *kdamond;
+       bool kdamond_stop;
+       struct mutex kdamond_lock;
+
+       struct damon_primitive primitive;
+       struct damon_callback callback;
+
+       void *target;
+};
+
+#ifdef CONFIG_DAMON
+
+struct damon_ctx *damon_new_ctx(void);
+void damon_destroy_ctx(struct damon_ctx *ctx);
+int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
+               unsigned long aggr_int, unsigned long primitive_upd_int);
+
+int damon_start(struct damon_ctx **ctxs, int nr_ctxs);
+int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
+
+#endif /* CONFIG_DAMON */
+
+#endif /* _DAMON_H */
diff --git a/mm/Kconfig b/mm/Kconfig

index 14d5d2837737c81b227f19a8fbeb40671499eed0..8459167b0294df87925b9852165dd62281455878 100644 (file)
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -886,4 +886,6 @@ config IO_MAPPING
  config SECRETMEM
         def_bool ARCH_HAS_SET_DIRECT_MAP && !EMBEDDED
  
+source "mm/damon/Kconfig"
+
  endmenu
diff --git a/mm/Makefile b/mm/Makefile

index 0e0a5a6fe127b8ce294803f16501440fe504463d..fc60a40ce954bd911005e2c178dcbb4590bfe29c 100644 (file)
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -118,6 +118,7 @@ obj-$(CONFIG_CMA_SYSFS) += cma_sysfs.o
  obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
  obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
  obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
+obj-$(CONFIG_DAMON) += damon/
  obj-$(CONFIG_HARDENED_USERCOPY) += usercopy.o
  obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
  obj-$(CONFIG_ZONE_DEVICE) += memremap.o
diff --git a/mm/damon/Kconfig b/mm/damon/Kconfig

new file mode 100644 (file)

index 0000000..d00e99a
--- /dev/null
+++ b/mm/damon/Kconfig
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+menu "Data Access Monitoring"
+
+config DAMON
+       bool "DAMON: Data Access Monitoring Framework"
+       help
+         This builds a framework that allows kernel subsystems to monitor
+         access frequency of each memory region. The information can be useful
+         for performance-centric DRAM level memory management.
+
+         See https://damonitor.github.io/doc/html/latest-damon/index.html for
+         more information.
+
+endmenu
diff --git a/mm/damon/Makefile b/mm/damon/Makefile

new file mode 100644 (file)

index 0000000..4fd2edb
--- /dev/null
+++ b/mm/damon/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_DAMON)            := core.o
diff --git a/mm/damon/core.c b/mm/damon/core.c

new file mode 100644 (file)

index 0000000..651590b
--- /dev/null
+++ b/mm/damon/core.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Data Access Monitor
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#define pr_fmt(fmt) "damon: " fmt
+
+#include <linux/damon.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/slab.h>
+
+static DEFINE_MUTEX(damon_lock);
+static int nr_running_ctxs;
+
+struct damon_ctx *damon_new_ctx(void)
+{
+       struct damon_ctx *ctx;
+
+       ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+       if (!ctx)
+               return NULL;
+
+       ctx->sample_interval = 5 * 1000;
+       ctx->aggr_interval = 100 * 1000;
+       ctx->primitive_update_interval = 60 * 1000 * 1000;
+
+       ktime_get_coarse_ts64(&ctx->last_aggregation);
+       ctx->last_primitive_update = ctx->last_aggregation;
+
+       mutex_init(&ctx->kdamond_lock);
+
+       ctx->target = NULL;
+
+       return ctx;
+}
+
+void damon_destroy_ctx(struct damon_ctx *ctx)
+{
+       if (ctx->primitive.cleanup)
+               ctx->primitive.cleanup(ctx);
+       kfree(ctx);
+}
+
+/**
+ * damon_set_attrs() - Set attributes for the monitoring.
+ * @ctx:               monitoring context
+ * @sample_int:                time interval between samplings
+ * @aggr_int:          time interval between aggregations
+ * @primitive_upd_int: time interval between monitoring primitive updates
+ *
+ * This function should not be called while the kdamond is running.
+ * Every time interval is in micro-seconds.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
+                   unsigned long aggr_int, unsigned long primitive_upd_int)
+{
+       ctx->sample_interval = sample_int;
+       ctx->aggr_interval = aggr_int;
+       ctx->primitive_update_interval = primitive_upd_int;
+
+       return 0;
+}
+
+static bool damon_kdamond_running(struct damon_ctx *ctx)
+{
+       bool running;
+
+       mutex_lock(&ctx->kdamond_lock);
+       running = ctx->kdamond != NULL;
+       mutex_unlock(&ctx->kdamond_lock);
+
+       return running;
+}
+
+static int kdamond_fn(void *data);
+
+/*
+ * __damon_start() - Starts monitoring with given context.
+ * @ctx:       monitoring context
+ *
+ * This function should be called while damon_lock is hold.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int __damon_start(struct damon_ctx *ctx)
+{
+       int err = -EBUSY;
+
+       mutex_lock(&ctx->kdamond_lock);
+       if (!ctx->kdamond) {
+               err = 0;
+               ctx->kdamond_stop = false;
+               ctx->kdamond = kthread_run(kdamond_fn, ctx, "kdamond.%d",
+                               nr_running_ctxs);
+               if (IS_ERR(ctx->kdamond)) {
+                       err = PTR_ERR(ctx->kdamond);
+                       ctx->kdamond = 0;
+               }
+       }
+       mutex_unlock(&ctx->kdamond_lock);
+
+       return err;
+}
+
+/**
+ * damon_start() - Starts the monitorings for a given group of contexts.
+ * @ctxs:      an array of the pointers for contexts to start monitoring
+ * @nr_ctxs:   size of @ctxs
+ *
+ * This function starts a group of monitoring threads for a group of monitoring
+ * contexts.  One thread per each context is created and run in parallel.  The
+ * caller should handle synchronization between the threads by itself.  If a
+ * group of threads that created by other 'damon_start()' call is currently
+ * running, this function does nothing but returns -EBUSY.
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_start(struct damon_ctx **ctxs, int nr_ctxs)
+{
+       int i;
+       int err = 0;
+
+       mutex_lock(&damon_lock);
+       if (nr_running_ctxs) {
+               mutex_unlock(&damon_lock);
+               return -EBUSY;
+       }
+
+       for (i = 0; i < nr_ctxs; i++) {
+               err = __damon_start(ctxs[i]);
+               if (err)
+                       break;
+               nr_running_ctxs++;
+       }
+       mutex_unlock(&damon_lock);
+
+       return err;
+}
+
+/*
+ * __damon_stop() - Stops monitoring of given context.
+ * @ctx:       monitoring context
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+static int __damon_stop(struct damon_ctx *ctx)
+{
+       mutex_lock(&ctx->kdamond_lock);
+       if (ctx->kdamond) {
+               ctx->kdamond_stop = true;
+               mutex_unlock(&ctx->kdamond_lock);
+               while (damon_kdamond_running(ctx))
+                       usleep_range(ctx->sample_interval,
+                                       ctx->sample_interval * 2);
+               return 0;
+       }
+       mutex_unlock(&ctx->kdamond_lock);
+
+       return -EPERM;
+}
+
+/**
+ * damon_stop() - Stops the monitorings for a given group of contexts.
+ * @ctxs:      an array of the pointers for contexts to stop monitoring
+ * @nr_ctxs:   size of @ctxs
+ *
+ * Return: 0 on success, negative error code otherwise.
+ */
+int damon_stop(struct damon_ctx **ctxs, int nr_ctxs)
+{
+       int i, err = 0;
+
+       for (i = 0; i < nr_ctxs; i++) {
+               /* nr_running_ctxs is decremented in kdamond_fn */
+               err = __damon_stop(ctxs[i]);
+               if (err)
+                       return err;
+       }
+
+       return err;
+}
+
+/*
+ * damon_check_reset_time_interval() - Check if a time interval is elapsed.
+ * @baseline:  the time to check whether the interval has elapsed since
+ * @interval:  the time interval (microseconds)
+ *
+ * See whether the given time interval has passed since the given baseline
+ * time.  If so, it also updates the baseline to current time for next check.
+ *
+ * Return:     true if the time interval has passed, or false otherwise.
+ */
+static bool damon_check_reset_time_interval(struct timespec64 *baseline,
+               unsigned long interval)
+{
+       struct timespec64 now;
+
+       ktime_get_coarse_ts64(&now);
+       if ((timespec64_to_ns(&now) - timespec64_to_ns(baseline)) <
+                       interval * 1000)
+               return false;
+       *baseline = now;
+       return true;
+}
+
+/*
+ * Check whether it is time to flush the aggregated information
+ */
+static bool kdamond_aggregate_interval_passed(struct damon_ctx *ctx)
+{
+       return damon_check_reset_time_interval(&ctx->last_aggregation,
+                       ctx->aggr_interval);
+}
+
+/*
+ * Check whether it is time to check and apply the target monitoring regions
+ *
+ * Returns true if it is.
+ */
+static bool kdamond_need_update_primitive(struct damon_ctx *ctx)
+{
+       return damon_check_reset_time_interval(&ctx->last_primitive_update,
+                       ctx->primitive_update_interval);
+}
+
+/*
+ * Check whether current monitoring should be stopped
+ *
+ * The monitoring is stopped when either the user requested to stop, or all
+ * monitoring targets are invalid.
+ *
+ * Returns true if need to stop current monitoring.
+ */
+static bool kdamond_need_stop(struct damon_ctx *ctx)
+{
+       bool stop;
+
+       mutex_lock(&ctx->kdamond_lock);
+       stop = ctx->kdamond_stop;
+       mutex_unlock(&ctx->kdamond_lock);
+       if (stop)
+               return true;
+
+       if (!ctx->primitive.target_valid)
+               return false;
+
+       return !ctx->primitive.target_valid(ctx->target);
+}
+
+static void set_kdamond_stop(struct damon_ctx *ctx)
+{
+       mutex_lock(&ctx->kdamond_lock);
+       ctx->kdamond_stop = true;
+       mutex_unlock(&ctx->kdamond_lock);
+}
+
+/*
+ * The monitoring daemon that runs as a kernel thread
+ */
+static int kdamond_fn(void *data)
+{
+       struct damon_ctx *ctx = (struct damon_ctx *)data;
+
+       mutex_lock(&ctx->kdamond_lock);
+       pr_info("kdamond (%d) starts\n", ctx->kdamond->pid);
+       mutex_unlock(&ctx->kdamond_lock);
+
+       if (ctx->primitive.init)
+               ctx->primitive.init(ctx);
+       if (ctx->callback.before_start && ctx->callback.before_start(ctx))
+               set_kdamond_stop(ctx);
+
+       while (!kdamond_need_stop(ctx)) {
+               if (ctx->primitive.prepare_access_checks)
+                       ctx->primitive.prepare_access_checks(ctx);
+               if (ctx->callback.after_sampling &&
+                               ctx->callback.after_sampling(ctx))
+                       set_kdamond_stop(ctx);
+
+               usleep_range(ctx->sample_interval, ctx->sample_interval + 1);
+
+               if (ctx->primitive.check_accesses)
+                       ctx->primitive.check_accesses(ctx);
+
+               if (kdamond_aggregate_interval_passed(ctx)) {
+                       if (ctx->callback.after_aggregation &&
+                                       ctx->callback.after_aggregation(ctx))
+                               set_kdamond_stop(ctx);
+                       if (ctx->primitive.reset_aggregated)
+                               ctx->primitive.reset_aggregated(ctx);
+               }
+
+               if (kdamond_need_update_primitive(ctx)) {
+                       if (ctx->primitive.update)
+                               ctx->primitive.update(ctx);
+               }
+       }
+
+       if (ctx->callback.before_terminate &&
+                       ctx->callback.before_terminate(ctx))
+               set_kdamond_stop(ctx);
+       if (ctx->primitive.cleanup)
+               ctx->primitive.cleanup(ctx);
+
+       pr_debug("kdamond (%d) finishes\n", ctx->kdamond->pid);
+       mutex_lock(&ctx->kdamond_lock);
+       ctx->kdamond = NULL;
+       mutex_unlock(&ctx->kdamond_lock);
+
+       mutex_lock(&damon_lock);
+       nr_running_ctxs--;
+       mutex_unlock(&damon_lock);
+
+       do_exit(0);
+}
author	SeongJae Park <sjpark@amazon.de>
	Wed, 8 Sep 2021 02:56:28 +0000 (19:56 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 8 Sep 2021 18:50:24 +0000 (11:50 -0700)
include/linux/damon.h	[new file with mode: 0644]	patch \| blob
mm/Kconfig		patch \| blob \| history
mm/Makefile		patch \| blob \| history
mm/damon/Kconfig	[new file with mode: 0644]	patch \| blob
mm/damon/Makefile	[new file with mode: 0644]	patch \| blob
mm/damon/core.c	[new file with mode: 0644]	patch \| blob