EXPORT_PER_CPU_SYMBOL(numa_node);
#endif
+DEFINE_STATIC_KEY_TRUE(vm_numa_stat_key);
+
#ifdef CONFIG_HAVE_MEMORYLESS_NODES
/*
* N.B., Do NOT reference the '_numa_mem_' per cpu variable directly.
int page_group_by_mobility_disabled __read_mostly;
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+
+/*
+ * Determine how many pages need to be initialized durig early boot
+ * (non-deferred initialization).
+ * The value of first_deferred_pfn will be set later, once non-deferred pages
+ * are initialized, but for now set it ULONG_MAX.
+ */
static inline void reset_deferred_meminit(pg_data_t *pgdat)
{
- unsigned long max_initialise;
- unsigned long reserved_lowmem;
+ phys_addr_t start_addr, end_addr;
+ unsigned long max_pgcnt;
+ unsigned long reserved;
/*
* Initialise at least 2G of a node but also take into account that
* two large system hashes that can take up 1GB for 0.25TB/node.
*/
- max_initialise = max(2UL << (30 - PAGE_SHIFT),
- (pgdat->node_spanned_pages >> 8));
+ max_pgcnt = max(2UL << (30 - PAGE_SHIFT),
+ (pgdat->node_spanned_pages >> 8));
/*
* Compensate the all the memblock reservations (e.g. crash kernel)
* from the initial estimation to make sure we will initialize enough
* memory to boot.
*/
- reserved_lowmem = memblock_reserved_memory_within(pgdat->node_start_pfn,
- pgdat->node_start_pfn + max_initialise);
- max_initialise += reserved_lowmem;
+ start_addr = PFN_PHYS(pgdat->node_start_pfn);
+ end_addr = PFN_PHYS(pgdat->node_start_pfn + max_pgcnt);
+ reserved = memblock_reserved_memory_within(start_addr, end_addr);
+ max_pgcnt += PHYS_PFN(reserved);
- pgdat->static_init_size = min(max_initialise, pgdat->node_spanned_pages);
+ pgdat->static_init_pgcnt = min(max_pgcnt, pgdat->node_spanned_pages);
pgdat->first_deferred_pfn = ULONG_MAX;
}
if (zone_end < pgdat_end_pfn(pgdat))
return true;
(*nr_initialised)++;
- if ((*nr_initialised > pgdat->static_init_size) &&
+ if ((*nr_initialised > pgdat->static_init_pgcnt) &&
(pfn & (PAGES_PER_SECTION - 1)) == 0) {
pgdat->first_deferred_pfn = pfn;
return false;
static void __meminit __init_single_page(struct page *page, unsigned long pfn,
unsigned long zone, int nid)
{
+ mm_zero_struct_page(page);
set_page_links(page, zone, nid, pfn);
init_page_count(page);
page_mapcount_reset(page);
* Go through the free lists for the given migratetype and remove
* the smallest available page from the freelists
*/
-static inline
+static __always_inline
struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
int migratetype)
{
};
#ifdef CONFIG_CMA
-static struct page *__rmqueue_cma_fallback(struct zone *zone,
+static __always_inline struct page *__rmqueue_cma_fallback(struct zone *zone,
unsigned int order)
{
return __rmqueue_smallest(zone, order, MIGRATE_CMA);
* deviation from the rest of this file, to make the for loop
* condition simpler.
*/
-static inline bool
+static __always_inline bool
__rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
{
struct free_area *area;
* Do the hard work of removing an element from the buddy allocator.
* Call me with the zone->lock already held.
*/
-static struct page *__rmqueue(struct zone *zone, unsigned int order,
- int migratetype)
+static __always_inline struct page *
+__rmqueue(struct zone *zone, unsigned int order, int migratetype)
{
struct page *page;
*/
static int rmqueue_bulk(struct zone *zone, unsigned int order,
unsigned long count, struct list_head *list,
- int migratetype, bool cold)
+ int migratetype)
{
int i, alloced = 0;
continue;
/*
- * Split buddy pages returned by expand() are received here
- * in physical page order. The page is added to the callers and
- * list and the list head then moves forward. From the callers
- * perspective, the linked list is ordered by page number in
- * some conditions. This is useful for IO devices that can
- * merge IO requests if the physical pages are ordered
- * properly.
+ * Split buddy pages returned by expand() are received here in
+ * physical page order. The page is added to the tail of
+ * caller's list. From the callers perspective, the linked list
+ * is ordered by page number under some conditions. This is
+ * useful for IO devices that can forward direction from the
+ * head, thus also in the physical page order. This is useful
+ * for IO devices that can merge IO requests if the physical
+ * pages are ordered properly.
*/
- if (likely(!cold))
- list_add(&page->lru, list);
- else
- list_add_tail(&page->lru, list);
- list = &page->lru;
+ list_add_tail(&page->lru, list);
alloced++;
if (is_migrate_cma(get_pcppage_migratetype(page)))
__mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
}
#endif /* CONFIG_PM */
-/*
- * Free a 0-order page
- * cold == true ? free a cold page : free a hot page
- */
-void free_hot_cold_page(struct page *page, bool cold)
+static bool free_unref_page_prepare(struct page *page, unsigned long pfn)
{
- struct zone *zone = page_zone(page);
- struct per_cpu_pages *pcp;
- unsigned long flags;
- unsigned long pfn = page_to_pfn(page);
int migratetype;
if (!free_pcp_prepare(page))
- return;
+ return false;
migratetype = get_pfnblock_migratetype(page, pfn);
set_pcppage_migratetype(page, migratetype);
- local_irq_save(flags);
+ return true;
+}
+
+static void free_unref_page_commit(struct page *page, unsigned long pfn)
+{
+ struct zone *zone = page_zone(page);
+ struct per_cpu_pages *pcp;
+ int migratetype;
+
+ migratetype = get_pcppage_migratetype(page);
__count_vm_event(PGFREE);
/*
if (migratetype >= MIGRATE_PCPTYPES) {
if (unlikely(is_migrate_isolate(migratetype))) {
free_one_page(zone, page, pfn, 0, migratetype);
- goto out;
+ return;
}
migratetype = MIGRATE_MOVABLE;
}
pcp = &this_cpu_ptr(zone->pageset)->pcp;
- if (!cold)
- list_add(&page->lru, &pcp->lists[migratetype]);
- else
- list_add_tail(&page->lru, &pcp->lists[migratetype]);
+ list_add(&page->lru, &pcp->lists[migratetype]);
pcp->count++;
if (pcp->count >= pcp->high) {
unsigned long batch = READ_ONCE(pcp->batch);
free_pcppages_bulk(zone, batch, pcp);
pcp->count -= batch;
}
+}
-out:
+/*
+ * Free a 0-order page
+ */
+void free_unref_page(struct page *page)
+{
+ unsigned long flags;
+ unsigned long pfn = page_to_pfn(page);
+
+ if (!free_unref_page_prepare(page, pfn))
+ return;
+
+ local_irq_save(flags);
+ free_unref_page_commit(page, pfn);
local_irq_restore(flags);
}
/*
* Free a list of 0-order pages
*/
-void free_hot_cold_page_list(struct list_head *list, bool cold)
+void free_unref_page_list(struct list_head *list)
{
struct page *page, *next;
+ unsigned long flags, pfn;
+ /* Prepare pages for freeing */
list_for_each_entry_safe(page, next, list, lru) {
- trace_mm_page_free_batched(page, cold);
- free_hot_cold_page(page, cold);
+ pfn = page_to_pfn(page);
+ if (!free_unref_page_prepare(page, pfn))
+ list_del(&page->lru);
+ set_page_private(page, pfn);
}
+
+ local_irq_save(flags);
+ list_for_each_entry_safe(page, next, list, lru) {
+ unsigned long pfn = page_private(page);
+
+ set_page_private(page, 0);
+ trace_mm_page_free_batched(page);
+ free_unref_page_commit(page, pfn);
+ }
+ local_irq_restore(flags);
}
/*
#ifdef CONFIG_NUMA
enum numa_stat_item local_stat = NUMA_LOCAL;
+ /* skip numa counters update if numa stats is disabled */
+ if (!static_branch_likely(&vm_numa_stat_key))
+ return;
+
if (z->node != numa_node_id())
local_stat = NUMA_OTHER;
/* Remove page from the per-cpu list, caller must protect the list */
static struct page *__rmqueue_pcplist(struct zone *zone, int migratetype,
- bool cold, struct per_cpu_pages *pcp,
+ struct per_cpu_pages *pcp,
struct list_head *list)
{
struct page *page;
if (list_empty(list)) {
pcp->count += rmqueue_bulk(zone, 0,
pcp->batch, list,
- migratetype, cold);
+ migratetype);
if (unlikely(list_empty(list)))
return NULL;
}
- if (cold)
- page = list_last_entry(list, struct page, lru);
- else
- page = list_first_entry(list, struct page, lru);
-
+ page = list_first_entry(list, struct page, lru);
list_del(&page->lru);
pcp->count--;
} while (check_new_pcp(page));
{
struct per_cpu_pages *pcp;
struct list_head *list;
- bool cold = ((gfp_flags & __GFP_COLD) != 0);
struct page *page;
unsigned long flags;
local_irq_save(flags);
pcp = &this_cpu_ptr(zone->pageset)->pcp;
list = &pcp->lists[migratetype];
- page = __rmqueue_pcplist(zone, migratetype, cold, pcp, list);
+ page = __rmqueue_pcplist(zone, migratetype, pcp, list);
if (page) {
__count_zid_vm_events(PGALLOC, page_zonenum(page), 1 << order);
zone_statistics(preferred_zone, zone);
if (!area->nr_free)
continue;
- if (alloc_harder)
- return true;
-
for (mt = 0; mt < MIGRATE_PCPTYPES; mt++) {
if (!list_empty(&area->free_list[mt]))
return true;
return true;
}
#endif
+ if (alloc_harder &&
+ !list_empty(&area->free_list[MIGRATE_HIGHATOMIC]))
+ return true;
}
return false;
}
if ((gfp_mask & __GFP_NOWARN) || !__ratelimit(&nopage_rs))
return;
- pr_warn("%s: ", current->comm);
-
va_start(args, fmt);
vaf.fmt = fmt;
vaf.va = &args;
- pr_cont("%pV", &vaf);
+ pr_warn("%s: %pV, mode:%#x(%pGg), nodemask=%*pbl\n",
+ current->comm, &vaf, gfp_mask, &gfp_mask,
+ nodemask_pr_args(nodemask));
va_end(args);
- pr_cont(", mode:%#x(%pGg), nodemask=", gfp_mask, &gfp_mask);
- if (nodemask)
- pr_cont("%*pbl\n", nodemask_pr_args(nodemask));
- else
- pr_cont("(null)\n");
-
cpuset_print_current_mems_allowed();
dump_stack();
enum compact_result compact_result;
int compaction_retries;
int no_progress_loops;
- unsigned long alloc_start = jiffies;
- unsigned int stall_timeout = 10 * HZ;
unsigned int cpuset_mems_cookie;
int reserve_flags;
if (!can_direct_reclaim)
goto nopage;
- /* Make sure we know about allocations which stall for too long */
- if (time_after(jiffies, alloc_start + stall_timeout)) {
- warn_alloc(gfp_mask & ~__GFP_NOWARN, ac->nodemask,
- "page allocation stalls for %ums, order:%u",
- jiffies_to_msecs(jiffies-alloc_start), order);
- stall_timeout += 10 * HZ;
- }
-
/* Avoid recursion of direct reclaim */
if (current->flags & PF_MEMALLOC)
goto nopage;
{
if (put_page_testzero(page)) {
if (order == 0)
- free_hot_cold_page(page, false);
+ free_unref_page(page);
else
__free_pages_ok(page, order);
}
unsigned int order = compound_order(page);
if (order == 0)
- free_hot_cold_page(page, false);
+ free_unref_page(page);
else
__free_pages_ok(page, order);
}