/* Activity counter to indicate that a swapon or swapoff has occurred */
static atomic_t proc_poll_event = ATOMIC_INIT(0);
+atomic_t nr_rotate_swap = ATOMIC_INIT(0);
+
static inline unsigned char swap_count(unsigned char ent)
{
return ent & ~SWAP_HAS_CACHE; /* may include SWAP_HAS_CONT flag */
info->data = 0;
}
+static inline bool cluster_is_huge(struct swap_cluster_info *info)
+{
+ return info->flags & CLUSTER_FLAG_HUGE;
+}
+
+static inline void cluster_clear_huge(struct swap_cluster_info *info)
+{
+ info->flags &= ~CLUSTER_FLAG_HUGE;
+}
+
static inline struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
unsigned long offset)
{
offset = idx * SWAPFILE_CLUSTER;
ci = lock_cluster(si, offset);
alloc_cluster(si, idx);
- cluster_set_count_flag(ci, SWAPFILE_CLUSTER, 0);
+ cluster_set_count_flag(ci, SWAPFILE_CLUSTER, CLUSTER_FLAG_HUGE);
map = si->swap_map + offset;
for (i = 0; i < SWAPFILE_CLUSTER; i++)
spin_unlock(&si->lock);
goto nextsi;
}
- if (cluster)
- n_ret = swap_alloc_cluster(si, swp_entries);
- else
+ if (cluster) {
+ if (!(si->flags & SWP_FILE))
+ n_ret = swap_alloc_cluster(si, swp_entries);
+ } else
n_ret = scan_swap_map_slots(si, SWAP_HAS_CACHE,
n_goal, swp_entries);
spin_unlock(&si->lock);
struct swap_cluster_info *ci;
struct swap_info_struct *si;
unsigned char *map;
- unsigned int i;
+ unsigned int i, free_entries = 0;
+ unsigned char val;
- si = swap_info_get(entry);
+ si = _swap_info_get(entry);
if (!si)
return;
ci = lock_cluster(si, offset);
+ VM_BUG_ON(!cluster_is_huge(ci));
map = si->swap_map + offset;
for (i = 0; i < SWAPFILE_CLUSTER; i++) {
- VM_BUG_ON(map[i] != SWAP_HAS_CACHE);
- map[i] = 0;
+ val = map[i];
+ VM_BUG_ON(!(val & SWAP_HAS_CACHE));
+ if (val == SWAP_HAS_CACHE)
+ free_entries++;
}
+ if (!free_entries) {
+ for (i = 0; i < SWAPFILE_CLUSTER; i++)
+ map[i] &= ~SWAP_HAS_CACHE;
+ }
+ cluster_clear_huge(ci);
unlock_cluster(ci);
- mem_cgroup_uncharge_swap(entry, SWAPFILE_CLUSTER);
- swap_free_cluster(si, idx);
- spin_unlock(&si->lock);
+ if (free_entries == SWAPFILE_CLUSTER) {
+ spin_lock(&si->lock);
+ ci = lock_cluster(si, offset);
+ memset(map, 0, SWAPFILE_CLUSTER);
+ unlock_cluster(ci);
+ mem_cgroup_uncharge_swap(entry, SWAPFILE_CLUSTER);
+ swap_free_cluster(si, idx);
+ spin_unlock(&si->lock);
+ } else if (free_entries) {
+ for (i = 0; i < SWAPFILE_CLUSTER; i++, entry.val++) {
+ if (!__swap_entry_free(si, entry, SWAP_HAS_CACHE))
+ free_swap_slot(entry);
+ }
+ }
+}
+
+int split_swap_cluster(swp_entry_t entry)
+{
+ struct swap_info_struct *si;
+ struct swap_cluster_info *ci;
+ unsigned long offset = swp_offset(entry);
+
+ si = _swap_info_get(entry);
+ if (!si)
+ return -EBUSY;
+ ci = lock_cluster(si, offset);
+ cluster_clear_huge(ci);
+ unlock_cluster(ci);
+ return 0;
}
#else
static inline void swapcache_free_cluster(swp_entry_t entry)
return count;
}
+#ifdef CONFIG_THP_SWAP
+static bool swap_page_trans_huge_swapped(struct swap_info_struct *si,
+ swp_entry_t entry)
+{
+ struct swap_cluster_info *ci;
+ unsigned char *map = si->swap_map;
+ unsigned long roffset = swp_offset(entry);
+ unsigned long offset = round_down(roffset, SWAPFILE_CLUSTER);
+ int i;
+ bool ret = false;
+
+ ci = lock_cluster_or_swap_info(si, offset);
+ if (!ci || !cluster_is_huge(ci)) {
+ if (map[roffset] != SWAP_HAS_CACHE)
+ ret = true;
+ goto unlock_out;
+ }
+ for (i = 0; i < SWAPFILE_CLUSTER; i++) {
+ if (map[offset + i] != SWAP_HAS_CACHE) {
+ ret = true;
+ break;
+ }
+ }
+unlock_out:
+ unlock_cluster_or_swap_info(si, ci);
+ return ret;
+}
+
+static bool page_swapped(struct page *page)
+{
+ swp_entry_t entry;
+ struct swap_info_struct *si;
+
+ if (likely(!PageTransCompound(page)))
+ return page_swapcount(page) != 0;
+
+ page = compound_head(page);
+ entry.val = page_private(page);
+ si = _swap_info_get(entry);
+ if (si)
+ return swap_page_trans_huge_swapped(si, entry);
+ return false;
+}
+
+static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
+ int *total_swapcount)
+{
+ int i, map_swapcount, _total_mapcount, _total_swapcount;
+ unsigned long offset = 0;
+ struct swap_info_struct *si;
+ struct swap_cluster_info *ci = NULL;
+ unsigned char *map = NULL;
+ int mapcount, swapcount = 0;
+
+ /* hugetlbfs shouldn't call it */
+ VM_BUG_ON_PAGE(PageHuge(page), page);
+
+ if (likely(!PageTransCompound(page))) {
+ mapcount = atomic_read(&page->_mapcount) + 1;
+ if (total_mapcount)
+ *total_mapcount = mapcount;
+ if (PageSwapCache(page))
+ swapcount = page_swapcount(page);
+ if (total_swapcount)
+ *total_swapcount = swapcount;
+ return mapcount + swapcount;
+ }
+
+ page = compound_head(page);
+
+ _total_mapcount = _total_swapcount = map_swapcount = 0;
+ if (PageSwapCache(page)) {
+ swp_entry_t entry;
+
+ entry.val = page_private(page);
+ si = _swap_info_get(entry);
+ if (si) {
+ map = si->swap_map;
+ offset = swp_offset(entry);
+ }
+ }
+ if (map)
+ ci = lock_cluster(si, offset);
+ for (i = 0; i < HPAGE_PMD_NR; i++) {
+ mapcount = atomic_read(&page[i]._mapcount) + 1;
+ _total_mapcount += mapcount;
+ if (map) {
+ swapcount = swap_count(map[offset + i]);
+ _total_swapcount += swapcount;
+ }
+ map_swapcount = max(map_swapcount, mapcount + swapcount);
+ }
+ unlock_cluster(ci);
+ if (PageDoubleMap(page)) {
+ map_swapcount -= 1;
+ _total_mapcount -= HPAGE_PMD_NR;
+ }
+ mapcount = compound_mapcount(page);
+ map_swapcount += mapcount;
+ _total_mapcount += mapcount;
+ if (total_mapcount)
+ *total_mapcount = _total_mapcount;
+ if (total_swapcount)
+ *total_swapcount = _total_swapcount;
+
+ return map_swapcount;
+}
+#else
+#define swap_page_trans_huge_swapped(si, entry) swap_swapcount(si, entry)
+#define page_swapped(page) (page_swapcount(page) != 0)
+
+static int page_trans_huge_map_swapcount(struct page *page, int *total_mapcount,
+ int *total_swapcount)
+{
+ int mapcount, swapcount = 0;
+
+ /* hugetlbfs shouldn't call it */
+ VM_BUG_ON_PAGE(PageHuge(page), page);
+
+ mapcount = page_trans_huge_mapcount(page, total_mapcount);
+ if (PageSwapCache(page))
+ swapcount = page_swapcount(page);
+ if (total_swapcount)
+ *total_swapcount = swapcount;
+ return mapcount + swapcount;
+}
+#endif
+
/*
* We can write to an anon page without COW if there are no other references
* to it. And as a side-effect, free up its swap: because the old content
* on disk will never be read, and seeking back there to write new content
* later would only waste time away from clustering.
*
- * NOTE: total_mapcount should not be relied upon by the caller if
+ * NOTE: total_map_swapcount should not be relied upon by the caller if
* reuse_swap_page() returns false, but it may be always overwritten
* (see the other implementation for CONFIG_SWAP=n).
*/
-bool reuse_swap_page(struct page *page, int *total_mapcount)
+bool reuse_swap_page(struct page *page, int *total_map_swapcount)
{
- int count;
+ int count, total_mapcount, total_swapcount;
VM_BUG_ON_PAGE(!PageLocked(page), page);
if (unlikely(PageKsm(page)))
return false;
- count = page_trans_huge_mapcount(page, total_mapcount);
- if (count <= 1 && PageSwapCache(page)) {
- count += page_swapcount(page);
- if (count != 1)
- goto out;
+ count = page_trans_huge_map_swapcount(page, &total_mapcount,
+ &total_swapcount);
+ if (total_map_swapcount)
+ *total_map_swapcount = total_mapcount + total_swapcount;
+ if (count == 1 && PageSwapCache(page) &&
+ (likely(!PageTransCompound(page)) ||
+ /* The remaining swap count will be freed soon */
+ total_swapcount == page_swapcount(page))) {
if (!PageWriteback(page)) {
+ page = compound_head(page);
delete_from_swap_cache(page);
SetPageDirty(page);
} else {
spin_unlock(&p->lock);
}
}
-out:
+
return count <= 1;
}
return 0;
if (PageWriteback(page))
return 0;
- if (page_swapcount(page))
+ if (page_swapped(page))
return 0;
/*
if (pm_suspended_storage())
return 0;
+ page = compound_head(page);
delete_from_swap_cache(page);
SetPageDirty(page);
return 1;
p = _swap_info_get(entry);
if (p) {
count = __swap_entry_free(p, entry, 1);
- if (count == SWAP_HAS_CACHE) {
+ if (count == SWAP_HAS_CACHE &&
+ !swap_page_trans_huge_swapped(p, entry)) {
page = find_get_page(swap_address_space(entry),
swp_offset(entry));
if (page && !trylock_page(page)) {
*/
if (PageSwapCache(page) && !PageWriteback(page) &&
(!page_mapped(page) || mem_cgroup_swap_full(page)) &&
- !swap_swapcount(p, entry)) {
+ !swap_page_trans_huge_swapped(p, entry)) {
+ page = compound_head(page);
delete_from_swap_cache(page);
SetPageDirty(page);
}
.sync_mode = WB_SYNC_NONE,
};
- swap_writepage(page, &wbc);
+ swap_writepage(compound_head(page), &wbc);
lock_page(page);
wait_on_page_writeback(page);
}
* delete, since it may not have been written out to swap yet.
*/
if (PageSwapCache(page) &&
- likely(page_private(page) == entry.val))
- delete_from_swap_cache(page);
+ likely(page_private(page) == entry.val) &&
+ !page_swapped(page))
+ delete_from_swap_cache(compound_head(page));
/*
* So we could skip searching mms once swap count went
if (p->flags & SWP_CONTINUED)
free_swap_count_continuations(p);
+ if (!p->bdev || !blk_queue_nonrot(bdev_get_queue(p->bdev)))
+ atomic_dec(&nr_rotate_swap);
+
mutex_lock(&swapon_mutex);
spin_lock(&swap_lock);
spin_lock(&p->lock);
cluster = per_cpu_ptr(p->percpu_cluster, cpu);
cluster_set_null(&cluster->index);
}
- }
+ } else
+ atomic_inc(&nr_rotate_swap);
error = swap_cgroup_swapon(p->type, maxpages);
if (error)