2 a mmap based memory allocator
4 Copyright (C) Andrew Tridgell 2007
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2 of the License, or (at your option) any later version.
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 The basic aims of this allocator are:
24 - fast for small allocations
25 - all memory comes from anonymous mmap
26 - releases memory back to the OS as much as possible
27 - no per-block prefix (so extremely low overhead)
28 - very little paranoid checking in the code
29 - thread safety optional at compile time
33 allocations are either 'small' or 'large' or 'memalign'
35 'large' allocations do a plain mmap, one per allocation. When
36 you free you munmap. The 'large' allocations have a struct
37 large_header prefix, so the returned pointer is not in fact on a
40 'small' allocations come from a bucket allocator. There are
41 a set of buckets, each larger than the previous by a constant
42 factor. 'small' allocations do not have any prefix header before
45 The meta-data for the small allocations comes at the start of
46 the page. This meta-data contains a bitmap of what pieces of the
47 page are currently used.
49 'memalign' allocations are for handling memlign()
50 requests. These are horribly inefficient, using a singly linked
51 list. Best to avoid this function.
69 #define likely(x) __builtin_expect(!!(x), 1)
70 #define unlikely(x) __builtin_expect(!!(x), 0)
76 #define MAX_BUCKETS 15
77 #define BASE_BUCKET 16
78 #define FREE_PAGE_LIMIT 2
79 #define ALLOC_ALIGNMENT 16
80 #define BUCKET_NEXT_SIZE(prev) ((3*(prev))/2)
82 #define BUCKET_LOOKUP_SIZE 128
83 #define ALLOC_PARANOIA 0
85 /* you can hard code the page size in the Makefile for more speed */
87 #define PAGE_SIZE state.page_size
91 #define MIN(a,b) ((a)<(b)?(a):(b))
94 #define ALIGN_SIZE(size) (((size)+(ALLOC_ALIGNMENT-1))&~(ALLOC_ALIGNMENT-1))
96 /* work out how many bytes aligning to ALLOC_ALIGNMENT plus at least sizeof(type)
98 #define ALIGN_COST(type) ALIGN_SIZE(sizeof(type))
100 /* take a pointer, and move it up by at least one sizeof(type), aligned
101 to the ALLOC_ALIGNMENT */
102 #define ALIGN_UP(ptr, type) ((ALIGN_COST(type)+(intptr_t)(ptr)))
104 /* align to the start of the page, and return a type* */
105 #define ALIGN_DOWN_PAGE(ptr, type) (type *)((~(intptr_t)(PAGE_SIZE-1)) & (intptr_t)(ptr))
107 /* work out the aligned size of a page header, given the size of the used[] array */
108 #define PAGE_HEADER_SIZE(n) ALIGN_SIZE(offsetof(struct page_header, used) + 4*(((n)+31)/32))
110 /* see if a pointer is page aligned. This detects large memalign() pointers */
111 #define IS_PAGE_ALIGNED(ptr) (((PAGE_SIZE-1) & (intptr_t)ptr) == 0)
113 /* move an element from the front of list1 to the front of list2 */
114 #define MOVE_LIST_FRONT(list1, list2, p) \
117 if (list1) list1->prev = NULL; \
119 if (p->next) p->next->prev = p; \
123 /* move an element from the anywhere in list1 to the front of list2 */
124 #define MOVE_LIST(list1, list2, p) \
127 MOVE_LIST_FRONT(list1, list2, p); \
129 p->prev->next = p->next; \
130 if (p->next) p->next->prev = p->prev; \
132 if (p->next) (p)->next->prev = (p); \
139 uint32_t num_pages; /* maps to num_pages in struct large_header */
140 uint16_t elements_used;
141 uint16_t num_elements;
145 struct page_header *next, *prev;
146 struct bucket_state *bucket;
150 struct large_header {
154 struct memalign_header {
155 struct memalign_header *next;
160 struct bucket_state {
161 uint32_t alloc_limit;
162 uint16_t elements_per_page;
164 pthread_mutex_t mutex;
166 struct page_header *page_list;
167 struct page_header *full_list;
173 uint32_t largest_bucket;
178 /* this is a lookup table that maps size/ALLOC_ALIGNMENT to the right bucket */
179 uint8_t bucket_lookup[BUCKET_LOOKUP_SIZE];
181 /* the rest of the structure may be written to after initialisation,
182 so all accesses need to hold the mutex */
184 pthread_mutex_t mutex;
186 uint32_t num_empty_pages;
187 struct page_header *empty_list;
188 struct memalign_header *memalign_list;
189 struct bucket_state buckets[MAX_BUCKETS];
193 /* static state of the allocator */
195 static struct alloc_state state;
197 static struct alloc_state state = {
198 .mutex = PTHREAD_MUTEX_INITIALIZER
203 #define THREAD_LOCK(mutexp) do { if (pthread_mutex_lock(mutexp) != 0) abort(); } while (0);
204 #define THREAD_UNLOCK(mutexp) do { if (pthread_mutex_unlock(mutexp) != 0) abort(); } while (0);
206 #define THREAD_LOCK(mutexp)
207 #define THREAD_UNLOCK(mutexp)
211 initialise the allocator
213 static void alloc_initialise(void)
216 THREAD_LOCK(&state.mutex);
217 state.page_size = getpagesize();
218 if (PAGE_SIZE != state.page_size) {
221 for (i=0;i<MAX_BUCKETS;i++) {
222 struct bucket_state *bs = &state.buckets[i];
225 pthread_mutex_init(&bs->mutex, NULL);
230 bs->alloc_limit = BASE_BUCKET;
231 n = bs->alloc_limit/ALLOC_ALIGNMENT;
232 if (n > BUCKET_LOOKUP_SIZE) {
233 n = BUCKET_LOOKUP_SIZE;
235 memset(&state.bucket_lookup[0], i, n);
238 struct bucket_state *bs0 = &state.buckets[i-1];
239 bs->alloc_limit = ALIGN_SIZE(BUCKET_NEXT_SIZE(bs0->alloc_limit));
241 /* squeeze in one more bucket if possible */
242 if (bs->alloc_limit > PAGE_SIZE - PAGE_HEADER_SIZE(1)) {
243 bs->alloc_limit = PAGE_SIZE - PAGE_HEADER_SIZE(1);
244 if (bs->alloc_limit == bs0->alloc_limit) break;
247 n = (bs->alloc_limit - bs0->alloc_limit)/ALLOC_ALIGNMENT;
248 if (n + b > BUCKET_LOOKUP_SIZE) {
249 n = BUCKET_LOOKUP_SIZE - b;
251 memset(&state.bucket_lookup[b], i, n);
255 /* work out how many elements can be put on each page */
256 bs->elements_per_page = (PAGE_SIZE-PAGE_HEADER_SIZE(1))/bs->alloc_limit;
257 while (bs->elements_per_page*bs->alloc_limit + PAGE_HEADER_SIZE(bs->elements_per_page) >
259 bs->elements_per_page--;
261 if (unlikely(bs->elements_per_page < 1)) {
265 state.max_bucket = i-1;
266 state.largest_bucket = state.buckets[state.max_bucket].alloc_limit;
267 state.initialised = true;
269 state.pid = getpid();
271 THREAD_UNLOCK(&state.mutex);
275 static void alloc_pid_handler(void)
277 state.pid = getpid();
282 large allocation function - use mmap per allocation
284 static void *alloc_large(size_t size)
287 struct large_header *lh;
289 num_pages = (size+ALIGN_COST(struct large_header)+(PAGE_SIZE-1))/PAGE_SIZE;
292 if (unlikely(num_pages < size/PAGE_SIZE)) {
296 lh = (struct large_header *)mmap(0, num_pages*PAGE_SIZE, PROT_READ | PROT_WRITE,
297 MAP_ANON | MAP_PRIVATE, -1, 0);
298 if (unlikely(lh == (struct large_header *)-1)) {
302 lh->num_pages = num_pages;
304 return (void *)ALIGN_UP(lh, struct large_header);
311 static void alloc_large_free(void *ptr)
313 struct large_header *lh = ALIGN_DOWN_PAGE(ptr, struct large_header);
314 munmap((void *)lh, lh->num_pages*PAGE_SIZE);
319 called with the bucket lock held
321 static void alloc_refill_bucket(struct bucket_state *bs)
323 struct page_header *ph;
325 /* see if we can get a page from the global empty list */
326 THREAD_LOCK(&state.mutex);
327 ph = state.empty_list;
329 MOVE_LIST_FRONT(state.empty_list, bs->page_list, ph);
330 ph->num_elements = bs->elements_per_page;
335 memset(ph->used, 0, (ph->num_elements+7)/8);
336 state.num_empty_pages--;
337 THREAD_UNLOCK(&state.mutex);
341 /* we need to allocate a new page */
342 ph = (struct page_header *)mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE,
343 MAP_ANON | MAP_PRIVATE, -1, 0);
344 if (unlikely(ph == (struct page_header *)-1)) {
345 THREAD_UNLOCK(&state.mutex);
349 /* we rely on mmap() giving us initially zero memory */
351 ph->num_elements = bs->elements_per_page;
356 /* link it into the page_list */
357 ph->next = bs->page_list;
358 if (ph->next) ph->next->prev = ph;
360 THREAD_UNLOCK(&state.mutex);
365 find the first free element in a bitmap. This assumes there is a bit
366 to be found! Any return >= num_bits means an error (no free bit found)
368 static inline unsigned alloc_find_free_bit(const uint32_t *bitmap, uint32_t num_bits)
371 const uint32_t n = (num_bits+31)/32;
373 if (bitmap[i] != 0xFFFFFFFF) {
374 return (i*32) + ffs(~bitmap[i]) - 1;
384 static void *alloc_malloc(size_t size)
387 struct bucket_state *bs;
388 struct page_header *ph;
390 if (unlikely(state.initialised == false)) {
395 if (unlikely(state.pid != getpid())) {
400 /* is it a large allocation? */
401 if (unlikely(size > state.largest_bucket)) {
402 return alloc_large(size);
405 /* find the right bucket */
406 if (likely(size / ALLOC_ALIGNMENT < BUCKET_LOOKUP_SIZE)) {
410 i = state.bucket_lookup[(size-1) / ALLOC_ALIGNMENT];
413 for (i=state.bucket_lookup[BUCKET_LOOKUP_SIZE-1];i<=state.max_bucket;i++) {
414 if (state.buckets[i].alloc_limit >= size) {
420 bs = &state.buckets[i];
422 THREAD_LOCK(&bs->mutex);
425 if (size > bs->alloc_limit) {
430 /* it might be empty. If so, refill it */
431 if (unlikely(bs->page_list == NULL)) {
432 alloc_refill_bucket(bs);
433 if (unlikely(bs->page_list == NULL)) {
434 THREAD_UNLOCK(&bs->mutex);
440 if (unlikely(bs->page_list->pid != getpid() &&
441 bs->page_list->elements_used >= bs->page_list->num_elements/2)) {
442 alloc_refill_bucket(bs);
446 /* take the first one */
448 i = alloc_find_free_bit(ph->used, ph->num_elements);
450 if (unlikely(i >= ph->num_elements)) {
456 /* mark this element as used */
457 ph->used[i/32] |= 1<<(i%32);
459 /* check if this page is now full */
460 if (unlikely(ph->elements_used == ph->num_elements)) {
461 MOVE_LIST_FRONT(bs->page_list, bs->full_list, ph);
464 THREAD_UNLOCK(&bs->mutex);
466 /* return the element within the page */
467 return (void *)((i*bs->alloc_limit)+PAGE_HEADER_SIZE(ph->num_elements)+(intptr_t)ph);
472 release all completely free pages
473 called with state.mutex held
475 static void alloc_release(void)
477 struct page_header *ph, *next;
479 /* release all the empty pages */
480 for (ph=state.empty_list;ph;ph=next) {
482 munmap((void *)ph, PAGE_SIZE);
484 state.empty_list = NULL;
485 state.num_empty_pages = 0;
489 free some memory from a alloc_memalign
491 static void alloc_memalign_free(void *ptr)
493 struct memalign_header *mh;
495 THREAD_LOCK(&state.mutex);
498 if (state.memalign_list == NULL) {
503 /* see if its the top one */
504 mh = state.memalign_list;
506 state.memalign_list = mh->next;
507 munmap(ptr, mh->num_pages*PAGE_SIZE);
508 THREAD_UNLOCK(&state.mutex);
513 /* otherwise walk it */
515 struct memalign_header *mh2 = mh->next;
517 mh->next = mh2->next;
518 munmap(ptr, mh2->num_pages);
519 THREAD_UNLOCK(&state.mutex);
526 THREAD_UNLOCK(&state.mutex);
529 /* it wasn't there ... */
536 realloc for memalign blocks
538 static void *alloc_memalign_realloc(void *ptr, size_t size)
540 struct memalign_header *mh;
543 THREAD_LOCK(&state.mutex);
544 for (mh=state.memalign_list;mh;mh=mh->next) {
545 if (mh->p == ptr) break;
552 THREAD_UNLOCK(&state.mutex);
559 memcpy(ptr2, ptr, MIN(size, mh->num_pages*PAGE_SIZE));
567 static void alloc_free(void *ptr)
569 struct page_header *ph = ALIGN_DOWN_PAGE(ptr, struct page_header);
570 struct bucket_state *bs;
573 if (unlikely(ptr == NULL)) {
577 if (unlikely(IS_PAGE_ALIGNED(ptr))) {
578 alloc_memalign_free(ptr);
582 if (ph->num_pages != 0) {
583 alloc_large_free(ptr);
588 THREAD_LOCK(&bs->mutex);
590 /* mark the block as being free in the page bitmap */
591 i = ((((intptr_t)ptr) - (intptr_t)ph) - PAGE_HEADER_SIZE(ph->num_elements)) /
594 if ((ph->used[i/32] & (1<<(i%32))) == 0) {
598 ph->used[i/32] &= ~(1<<(i%32));
600 /* if all blocks in this page were previously used, then move
601 the page from the full list to the page list */
602 if (unlikely(ph->elements_used == ph->num_elements)) {
603 MOVE_LIST(bs->full_list, bs->page_list, ph);
608 /* if all blocks in the page are now free, then its an empty
609 page, and can be added to the global pool of empty
610 pages. When that gets too large, release them completely */
611 if (unlikely(ph->elements_used == 0)) {
612 THREAD_LOCK(&state.mutex);
613 MOVE_LIST(bs->page_list, state.empty_list, ph);
614 state.num_empty_pages++;
615 if (state.num_empty_pages == FREE_PAGE_LIMIT) {
618 THREAD_UNLOCK(&state.mutex);
620 THREAD_UNLOCK(&bs->mutex);
625 realloc for large blocks
627 static void *alloc_realloc_large(void *ptr, size_t size)
630 struct large_header *lh = ALIGN_DOWN_PAGE(ptr, struct large_header);
632 /* check for wrap around */
633 if (size + ALIGN_COST(struct large_header) < size) {
637 /* if it fits in the current pointer, keep it there */
638 if (lh->num_pages*PAGE_SIZE >= (size + ALIGN_COST(struct large_header))) {
642 ptr2 = alloc_malloc(size);
648 MIN(size, (lh->num_pages*PAGE_SIZE)-ALIGN_COST(struct large_header)));
649 alloc_large_free(ptr);
658 static void *alloc_realloc(void *ptr, size_t size)
660 struct page_header *ph = ALIGN_DOWN_PAGE(ptr, struct page_header);
661 struct bucket_state *bs;
664 /* modern realloc() semantics */
665 if (unlikely(size == 0)) {
669 if (unlikely(ptr == NULL)) {
670 return alloc_malloc(size);
673 if (unlikely(IS_PAGE_ALIGNED(ptr))) {
674 return alloc_memalign_realloc(ptr, size);
677 /* if it was a large allocation, it needs special handling */
678 if (ph->num_pages != 0) {
679 return alloc_realloc_large(ptr, size);
683 if (bs->alloc_limit >= size) {
687 ptr2 = alloc_malloc(size);
688 if (unlikely(ptr2 == NULL)) {
692 memcpy(ptr2, ptr, MIN(bs->alloc_limit, size));
701 static void *alloc_calloc(size_t nmemb, size_t size)
704 uint32_t total_size = nmemb*size;
705 if (unlikely(size != 0 && total_size / size != nmemb)) {
709 if (unlikely(state.pid != getpid())) {
713 if (unlikely(total_size > state.largest_bucket)) {
714 if (unlikely(state.initialised == false)) {
717 return alloc_large(total_size);
719 ptr = alloc_malloc(total_size);
720 if (likely(ptr != NULL)) {
721 memset(ptr, 0, total_size);
728 memalign - the most difficult of the lot with these data structures
730 this is O(n) in the number of aligned allocations. Don't use
731 memalign() unless you _really_ need it
733 static void *alloc_memalign(size_t boundary, size_t size)
735 struct memalign_header *mh;
736 uint32_t extra_pages=0;
739 if (unlikely(state.pid != getpid())) {
744 if (unlikely(state.initialised == false)) {
748 /* must be power of 2, and no overflow on page alignment */
749 if (boundary & (boundary-1)) {
752 if (boundary == 0 || (size+PAGE_SIZE-1 < size)) {
756 /* try and get lucky */
757 if (boundary < state.largest_bucket) {
758 void *ptr = malloc(size);
759 if (((intptr_t)ptr) % boundary == 0) {
763 /* nope, do it the slow way */
767 mh = malloc(sizeof(struct memalign_header));
772 /* need to overallocate to guarantee alignment of larger than
774 if (boundary > PAGE_SIZE) {
775 extra_pages = boundary / PAGE_SIZE;
778 /* give them a page aligned allocation */
779 mh->num_pages = (size + PAGE_SIZE-1) / PAGE_SIZE;
781 if (mh->num_pages == 0) {
785 /* check for overflow due to large boundary */
786 if (mh->num_pages + extra_pages < mh->num_pages) {
791 mh->p = mmap(0, (extra_pages+mh->num_pages)*PAGE_SIZE, PROT_READ | PROT_WRITE,
792 MAP_ANON | MAP_PRIVATE, -1, 0);
793 if (mh->p == (void *)-1) {
798 while (((intptr_t)mh->p) % boundary) {
799 munmap(mh->p, PAGE_SIZE);
800 mh->p = (void *)(PAGE_SIZE + (intptr_t)mh->p);
804 munmap((void *)(mh->num_pages*PAGE_SIZE + (intptr_t)mh->p),
805 extra_pages*PAGE_SIZE);
809 THREAD_LOCK(&state.mutex);
810 mh->next = state.memalign_list;
811 state.memalign_list = mh;
812 THREAD_UNLOCK(&state.mutex);
818 void *malloc(size_t size)
820 return alloc_malloc(size);
823 void *calloc(size_t nmemb, size_t size)
825 return alloc_calloc(nmemb, size);
833 void cfree(void *ptr)
838 void *realloc(void *ptr, size_t size)
840 return alloc_realloc(ptr, size);
843 void *memalign(size_t boundary, size_t size)
845 return alloc_memalign(boundary, size);
848 void *valloc(size_t size)
850 return alloc_memalign(getpagesize(), size);
853 void *pvalloc(size_t size)
855 return alloc_memalign(getpagesize(), size);
858 int posix_memalign(void **memptr, size_t alignment, size_t size)
860 *memptr = alloc_memalign(alignment, size);
861 if (*memptr == NULL) {
868 /* try to catch the internal entry points too */
870 void *__libc_malloc(size_t size) __attribute__((weak, alias("malloc")));
871 void *__libc_calloc(size_t nmemb, size_t size) __attribute__((weak, alias("calloc")));
872 void __libc_free(void *ptr) __attribute__((weak, alias("free")));
873 void __libc_cfree(void *ptr) __attribute__((weak, alias("cfree")));
874 void *__libc_realloc(void *ptr, size_t size) __attribute__((weak, alias("realloc")));
875 void *__libc_memalign(size_t boundary, size_t size) __attribute__((weak, alias("memalign")));
876 void *__libc_valloc(size_t size) __attribute__((weak, alias("valloc")));
877 void *__libc_pvalloc(size_t size) __attribute__((weak, alias("pvalloc")));
878 int __posix_memalign(void **memptr, size_t alignment, size_t size) __attribute__((weak, alias("posix_memalign")));