}
stock->nr_pages += nr_pages;
+ if (stock->nr_pages > CHARGE_BATCH)
+ drain_stock(stock);
+
local_irq_restore(flags);
}
MC_TARGET_NONE = 0,
MC_TARGET_PAGE,
MC_TARGET_SWAP,
+ MC_TARGET_DEVICE,
};
static struct page *mc_handle_present_pte(struct vm_area_struct *vma,
unsigned long addr, pte_t ptent)
{
- struct page *page = vm_normal_page(vma, addr, ptent);
+ struct page *page = _vm_normal_page(vma, addr, ptent, true);
if (!page || !page_mapped(page))
return NULL;
return page;
}
-#ifdef CONFIG_SWAP
+#if defined(CONFIG_SWAP) || defined(CONFIG_DEVICE_PRIVATE)
static struct page *mc_handle_swap_pte(struct vm_area_struct *vma,
pte_t ptent, swp_entry_t *entry)
{
if (!(mc.flags & MOVE_ANON) || non_swap_entry(ent))
return NULL;
+
+ /*
+ * Handle MEMORY_DEVICE_PRIVATE which are ZONE_DEVICE page belonging to
+ * a device and because they are not accessible by CPU they are store
+ * as special swap entry in the CPU page table.
+ */
+ if (is_device_private_entry(ent)) {
+ page = device_private_entry_to_page(ent);
+ /*
+ * MEMORY_DEVICE_PRIVATE means ZONE_DEVICE page and which have
+ * a refcount of 1 when free (unlike normal page)
+ */
+ if (!page_ref_add_unless(page, 1, 1))
+ return NULL;
+ return page;
+ }
+
/*
* Because lookup_swap_cache() updates some statistics counter,
* we call find_get_page() with swapper_space directly.
* 2(MC_TARGET_SWAP): if the swap entry corresponding to this pte is a
* target for charge migration. if @target is not NULL, the entry is stored
* in target->ent.
+ * 3(MC_TARGET_DEVICE): like MC_TARGET_PAGE but page is MEMORY_DEVICE_PUBLIC
+ * or MEMORY_DEVICE_PRIVATE (so ZONE_DEVICE page and thus not on the lru).
+ * For now we such page is charge like a regular page would be as for all
+ * intent and purposes it is just special memory taking the place of a
+ * regular page.
+ *
+ * See Documentations/vm/hmm.txt and include/linux/hmm.h
*
* Called with pte lock held.
*/
*/
if (page->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
+ if (is_device_private_page(page) ||
+ is_device_public_page(page))
+ ret = MC_TARGET_DEVICE;
if (target)
target->page = page;
}
struct page *page = NULL;
enum mc_target_type ret = MC_TARGET_NONE;
+ if (unlikely(is_swap_pmd(pmd))) {
+ VM_BUG_ON(thp_migration_supported() &&
+ !is_pmd_migration_entry(pmd));
+ return ret;
+ }
page = pmd_page(pmd);
VM_BUG_ON_PAGE(!page || !PageHead(page), page);
if (!(mc.flags & MOVE_ANON))
ptl = pmd_trans_huge_lock(pmd, vma);
if (ptl) {
+ /*
+ * Note their can not be MC_TARGET_DEVICE for now as we do not
+ * support transparent huge page with MEMORY_DEVICE_PUBLIC or
+ * MEMORY_DEVICE_PRIVATE but this might change.
+ */
if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
mc.precharge += HPAGE_PMD_NR;
spin_unlock(ptl);
putback_lru_page(page);
}
put_page(page);
+ } else if (target_type == MC_TARGET_DEVICE) {
+ page = target.page;
+ if (!mem_cgroup_move_account(page, true,
+ mc.from, mc.to)) {
+ mc.precharge -= HPAGE_PMD_NR;
+ mc.moved_charge += HPAGE_PMD_NR;
+ }
+ put_page(page);
}
spin_unlock(ptl);
return 0;
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; addr += PAGE_SIZE) {
pte_t ptent = *(pte++);
+ bool device = false;
swp_entry_t ent;
if (!mc.precharge)
break;
switch (get_mctgt_type(vma, addr, ptent, &target)) {
+ case MC_TARGET_DEVICE:
+ device = true;
+ /* fall through */
case MC_TARGET_PAGE:
page = target.page;
/*
*/
if (PageTransCompound(page))
goto put;
- if (isolate_lru_page(page))
+ if (!device && isolate_lru_page(page))
goto put;
if (!mem_cgroup_move_account(page, false,
mc.from, mc.to)) {
/* we uncharge from mc.from later. */
mc.moved_charge++;
}
- putback_lru_page(page);
+ if (!device)
+ putback_lru_page(page);
put: /* get_mctgt_type() gets the page */
put_page(page);
break;
cancel_charge(memcg, nr_pages);
}
-static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
- unsigned long nr_anon, unsigned long nr_file,
- unsigned long nr_kmem, unsigned long nr_huge,
- unsigned long nr_shmem, struct page *dummy_page)
+struct uncharge_gather {
+ struct mem_cgroup *memcg;
+ unsigned long pgpgout;
+ unsigned long nr_anon;
+ unsigned long nr_file;
+ unsigned long nr_kmem;
+ unsigned long nr_huge;
+ unsigned long nr_shmem;
+ struct page *dummy_page;
+};
+
+static inline void uncharge_gather_clear(struct uncharge_gather *ug)
+{
+ memset(ug, 0, sizeof(*ug));
+}
+
+static void uncharge_batch(const struct uncharge_gather *ug)
{
- unsigned long nr_pages = nr_anon + nr_file + nr_kmem;
+ unsigned long nr_pages = ug->nr_anon + ug->nr_file + ug->nr_kmem;
unsigned long flags;
- if (!mem_cgroup_is_root(memcg)) {
- page_counter_uncharge(&memcg->memory, nr_pages);
+ if (!mem_cgroup_is_root(ug->memcg)) {
+ page_counter_uncharge(&ug->memcg->memory, nr_pages);
if (do_memsw_account())
- page_counter_uncharge(&memcg->memsw, nr_pages);
- if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && nr_kmem)
- page_counter_uncharge(&memcg->kmem, nr_kmem);
- memcg_oom_recover(memcg);
+ page_counter_uncharge(&ug->memcg->memsw, nr_pages);
+ if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && ug->nr_kmem)
+ page_counter_uncharge(&ug->memcg->kmem, ug->nr_kmem);
+ memcg_oom_recover(ug->memcg);
}
local_irq_save(flags);
- __this_cpu_sub(memcg->stat->count[MEMCG_RSS], nr_anon);
- __this_cpu_sub(memcg->stat->count[MEMCG_CACHE], nr_file);
- __this_cpu_sub(memcg->stat->count[MEMCG_RSS_HUGE], nr_huge);
- __this_cpu_sub(memcg->stat->count[NR_SHMEM], nr_shmem);
- __this_cpu_add(memcg->stat->events[PGPGOUT], pgpgout);
- __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
- memcg_check_events(memcg, dummy_page);
+ __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS], ug->nr_anon);
+ __this_cpu_sub(ug->memcg->stat->count[MEMCG_CACHE], ug->nr_file);
+ __this_cpu_sub(ug->memcg->stat->count[MEMCG_RSS_HUGE], ug->nr_huge);
+ __this_cpu_sub(ug->memcg->stat->count[NR_SHMEM], ug->nr_shmem);
+ __this_cpu_add(ug->memcg->stat->events[PGPGOUT], ug->pgpgout);
+ __this_cpu_add(ug->memcg->stat->nr_page_events, nr_pages);
+ memcg_check_events(ug->memcg, ug->dummy_page);
local_irq_restore(flags);
- if (!mem_cgroup_is_root(memcg))
- css_put_many(&memcg->css, nr_pages);
+ if (!mem_cgroup_is_root(ug->memcg))
+ css_put_many(&ug->memcg->css, nr_pages);
+}
+
+static void uncharge_page(struct page *page, struct uncharge_gather *ug)
+{
+ VM_BUG_ON_PAGE(PageLRU(page), page);
+ VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page);
+
+ if (!page->mem_cgroup)
+ return;
+
+ /*
+ * Nobody should be changing or seriously looking at
+ * page->mem_cgroup at this point, we have fully
+ * exclusive access to the page.
+ */
+
+ if (ug->memcg != page->mem_cgroup) {
+ if (ug->memcg) {
+ uncharge_batch(ug);
+ uncharge_gather_clear(ug);
+ }
+ ug->memcg = page->mem_cgroup;
+ }
+
+ if (!PageKmemcg(page)) {
+ unsigned int nr_pages = 1;
+
+ if (PageTransHuge(page)) {
+ nr_pages <<= compound_order(page);
+ ug->nr_huge += nr_pages;
+ }
+ if (PageAnon(page))
+ ug->nr_anon += nr_pages;
+ else {
+ ug->nr_file += nr_pages;
+ if (PageSwapBacked(page))
+ ug->nr_shmem += nr_pages;
+ }
+ ug->pgpgout++;
+ } else {
+ ug->nr_kmem += 1 << compound_order(page);
+ __ClearPageKmemcg(page);
+ }
+
+ ug->dummy_page = page;
+ page->mem_cgroup = NULL;
}
static void uncharge_list(struct list_head *page_list)
{
- struct mem_cgroup *memcg = NULL;
- unsigned long nr_shmem = 0;
- unsigned long nr_anon = 0;
- unsigned long nr_file = 0;
- unsigned long nr_huge = 0;
- unsigned long nr_kmem = 0;
- unsigned long pgpgout = 0;
+ struct uncharge_gather ug;
struct list_head *next;
- struct page *page;
+
+ uncharge_gather_clear(&ug);
/*
* Note that the list can be a single page->lru; hence the
*/
next = page_list->next;
do {
+ struct page *page;
+
page = list_entry(next, struct page, lru);
next = page->lru.next;
- VM_BUG_ON_PAGE(PageLRU(page), page);
- VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page);
-
- if (!page->mem_cgroup)
- continue;
-
- /*
- * Nobody should be changing or seriously looking at
- * page->mem_cgroup at this point, we have fully
- * exclusive access to the page.
- */
-
- if (memcg != page->mem_cgroup) {
- if (memcg) {
- uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
- nr_kmem, nr_huge, nr_shmem, page);
- pgpgout = nr_anon = nr_file = nr_kmem = 0;
- nr_huge = nr_shmem = 0;
- }
- memcg = page->mem_cgroup;
- }
-
- if (!PageKmemcg(page)) {
- unsigned int nr_pages = 1;
-
- if (PageTransHuge(page)) {
- nr_pages <<= compound_order(page);
- nr_huge += nr_pages;
- }
- if (PageAnon(page))
- nr_anon += nr_pages;
- else {
- nr_file += nr_pages;
- if (PageSwapBacked(page))
- nr_shmem += nr_pages;
- }
- pgpgout++;
- } else {
- nr_kmem += 1 << compound_order(page);
- __ClearPageKmemcg(page);
- }
-
- page->mem_cgroup = NULL;
+ uncharge_page(page, &ug);
} while (next != page_list);
- if (memcg)
- uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
- nr_kmem, nr_huge, nr_shmem, page);
+ if (ug.memcg)
+ uncharge_batch(&ug);
}
/**
*/
void mem_cgroup_uncharge(struct page *page)
{
+ struct uncharge_gather ug;
+
if (mem_cgroup_disabled())
return;
if (!page->mem_cgroup)
return;
- INIT_LIST_HEAD(&page->lru);
- uncharge_list(&page->lru);
+ uncharge_gather_clear(&ug);
+ uncharge_page(page, &ug);
+ uncharge_batch(&ug);
}
/**
this_cpu_sub(memcg->stat->count[MEMCG_SOCK], nr_pages);
- page_counter_uncharge(&memcg->memory, nr_pages);
- css_put_many(&memcg->css, nr_pages);
+ refill_stock(memcg, nr_pages);
}
static int __init cgroup_memory(char *s)