2 * Original FreeBSD kern_malloc.c adapted to the specifics of the DDE
3 * uma (slab) implementation.
5 * \author Thomas Friebel <tf13@os.inf.tu-dresden.de>
9 * Copyright (c) 1987, 1991, 1993
10 * The Regents of the University of California. All rights reserved.
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * @(#)kern_malloc.c 8.3 (Berkeley) 1/4/94
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD: src/sys/kern/kern_malloc.c,v 1.135.2.2 2005/01/31 23:26:16 imp Exp $");
44 #include <sys/param.h>
45 #include <sys/systm.h>
47 #include <sys/kernel.h>
49 #include <sys/malloc.h>
51 #include <sys/mutex.h>
52 #include <sys/vmmeter.h>
54 #include <sys/sysctl.h>
59 #include <vm/vm_param.h>
60 #include <vm/vm_kern.h>
61 #include <vm/vm_extern.h>
62 #include <vm/vm_map.h>
63 #include <vm/vm_page.h>
65 #include <vm/uma_int.h>
66 #include <vm/uma_dbg.h>
68 #include <dde_fbsd/uma.h>
69 #include <l4/dde/ddekit/pgtab.h>
70 #include <l4/dde/ddekit/panic.h>
73 #if defined(INVARIANTS) && defined(__i386__)
74 #include <machine/cpu.h>
78 * When realloc() is called, if the new size is sufficiently smaller than
79 * the old size, realloc() will allocate a new, smaller block to avoid
80 * wasting memory. 'Sufficiently smaller' is defined as: newsize <=
81 * oldsize / 2^n, where REALLOC_FRACTION defines the value of 'n'.
83 #ifndef REALLOC_FRACTION
84 #define REALLOC_FRACTION 1 /* new block if <= half the size */
87 MALLOC_DEFINE(M_CACHE, "cache", "Various Dynamically allocated caches");
88 MALLOC_DEFINE(M_DEVBUF, "devbuf", "device driver memory");
89 MALLOC_DEFINE(M_TEMP, "temp", "misc temporary data buffers");
91 MALLOC_DEFINE(M_IP6OPT, "ip6opt", "IPv6 options");
92 MALLOC_DEFINE(M_IP6NDP, "ip6ndp", "IPv6 Neighbor Discovery");
94 static void kmeminit(void *);
95 SYSINIT(kmem, SI_SUB_KMEM, SI_ORDER_FIRST, kmeminit, NULL)
97 static MALLOC_DEFINE(M_FREE, "free", "should be on free list");
99 static struct malloc_type *kmemstatistics;
101 static char *kmembase;
102 static char *kmemlimit;
105 #define KMEM_ZSHIFT 4
106 #define KMEM_ZBASE 16
107 #define KMEM_ZMASK (KMEM_ZBASE - 1)
110 #define KMEM_ZMAX PAGE_SIZE
112 #define KMEM_ZMAX (PAGE_SIZE - 80)
114 #define KMEM_ZSIZE (KMEM_ZMAX >> KMEM_ZSHIFT)
115 static u_int8_t kmemsize[KMEM_ZSIZE + 1];
117 /* These won't be powers of two for long */
129 {1024, "1024", NULL},
130 {2048, "2048", NULL},
132 {4096, "4096", NULL},
134 {8192, "8192", NULL},
136 {16384, "16384", NULL},
137 #if PAGE_SIZE > 16384
138 {32768, "32768", NULL},
139 #if PAGE_SIZE > 32768
140 {65536, "65536", NULL},
141 #if PAGE_SIZE > 65536
142 #error "Unsupported PAGE_SIZE"
148 #endif /* DDE_FBSD */
153 SYSCTL_UINT(_vm, OID_AUTO, kmem_size, CTLFLAG_RD, &vm_kmem_size, 0,
154 "Size of kernel memory");
156 u_int vm_kmem_size_max;
157 SYSCTL_UINT(_vm, OID_AUTO, kmem_size_max, CTLFLAG_RD, &vm_kmem_size_max, 0,
158 "Maximum size of kernel memory");
160 u_int vm_kmem_size_scale;
161 SYSCTL_UINT(_vm, OID_AUTO, kmem_size_scale, CTLFLAG_RD, &vm_kmem_size_scale, 0,
162 "Scale factor for kernel memory size");
165 * The malloc_mtx protects the kmemstatistics linked list.
168 struct mtx malloc_mtx;
170 #ifdef MALLOC_PROFILE
171 uint64_t krequests[KMEM_ZSIZE + 1];
173 static int sysctl_kern_mprof(SYSCTL_HANDLER_ARGS);
176 static int sysctl_kern_malloc(SYSCTL_HANDLER_ARGS);
178 /* time_uptime of last malloc(9) failure */
179 static time_t t_malloc_fail;
181 #ifdef MALLOC_MAKE_FAILURES
183 * Causes malloc failures every (n) mallocs with M_NOWAIT. If set to 0,
184 * doesn't cause failures.
186 SYSCTL_NODE(_debug, OID_AUTO, malloc, CTLFLAG_RD, 0,
187 "Kernel malloc debugging options");
189 static int malloc_failure_rate;
190 static int malloc_nowait_count;
191 static int malloc_failure_count;
192 SYSCTL_INT(_debug_malloc, OID_AUTO, failure_rate, CTLFLAG_RW,
193 &malloc_failure_rate, 0, "Every (n) mallocs with M_NOWAIT will fail");
194 TUNABLE_INT("debug.malloc.failure_rate", &malloc_failure_rate);
195 SYSCTL_INT(_debug_malloc, OID_AUTO, failure_count, CTLFLAG_RD,
196 &malloc_failure_count, 0, "Number of imposed M_NOWAIT malloc failures");
200 malloc_last_fail(void)
203 return (time_uptime - t_malloc_fail);
207 * Add this to the informational malloc_type bucket.
210 malloc_type_zone_allocated(struct malloc_type *ksp, unsigned long size,
213 mtx_lock(&ksp->ks_mtx);
216 ksp->ks_size |= 1 << zindx;
218 ksp->ks_memuse += size;
220 if (ksp->ks_memuse > ksp->ks_maxused)
221 ksp->ks_maxused = ksp->ks_memuse;
223 mtx_unlock(&ksp->ks_mtx);
227 malloc_type_allocated(struct malloc_type *ksp, unsigned long size)
229 malloc_type_zone_allocated(ksp, size, -1);
233 * Remove this allocation from the informational malloc_type bucket.
236 malloc_type_freed(struct malloc_type *ksp, unsigned long size)
238 mtx_lock(&ksp->ks_mtx);
239 KASSERT(size <= ksp->ks_memuse,
240 ("malloc(9)/free(9) confusion.\n%s",
241 "Probably freeing with wrong type, but maybe not here."));
242 ksp->ks_memuse -= size;
244 mtx_unlock(&ksp->ks_mtx);
250 * Allocate a block of memory.
252 * If M_NOWAIT is set, this routine will not block and return NULL if
253 * the allocation fails.
257 malloc(size, type, flags)
259 bsd_malloc(size, type, flags)
262 struct malloc_type *type;
272 unsigned long osize = size;
277 * To make sure that WAITOK or NOWAIT is set, but not more than
278 * one, and check against the API botches that are common.
280 indx = flags & (M_WAITOK | M_NOWAIT | M_DONTWAIT | M_TRYWAIT);
281 if (indx != M_NOWAIT && indx != M_WAITOK) {
285 printf("Bad malloc flags: %x\n", indx);
286 ddekit_debug("check backtrace");
291 static struct timeval lasterr;
292 static int curerr, once;
293 if (once == 0 && ppsratecheck(&lasterr, &curerr, 1)) {
294 printf("Bad malloc flags: %x\n", indx);
304 kdb_enter("zero size malloc");
306 #ifdef MALLOC_MAKE_FAILURES
307 if ((flags & M_NOWAIT) && (malloc_failure_rate != 0)) {
308 atomic_add_int(&malloc_nowait_count, 1);
309 if ((malloc_nowait_count % malloc_failure_rate) == 0) {
310 atomic_add_int(&malloc_failure_count, 1);
311 t_malloc_fail = time_uptime;
316 if (flags & M_WAITOK)
317 KASSERT(curthread->td_intr_nesting_level == 0,
318 ("malloc(M_WAITOK) in interrupt context"));
319 if (size <= KMEM_ZMAX) {
320 if (size & KMEM_ZMASK)
321 size = (size & ~KMEM_ZMASK) + KMEM_ZBASE;
322 indx = kmemsize[size >> KMEM_ZSHIFT];
323 zone = kmemzones[indx].kz_zone;
327 #ifdef MALLOC_PROFILE
328 krequests[size >> KMEM_ZSHIFT]++;
330 va = uma_zalloc(zone, flags);
337 malloc_type_zone_allocated(type, va == NULL ? 0 : size, indx);
339 size = roundup(size, PAGE_SIZE);
343 va = uma_large_malloc(size, flags);
345 va = bsd_large_malloc(size, flags);
347 malloc_type_allocated(type, va == NULL ? 0 : size);
349 if (flags & M_WAITOK)
350 KASSERT(va != NULL, ("malloc(M_WAITOK) returned NULL"));
352 t_malloc_fail = time_uptime;
354 if (va != NULL && !(flags & M_ZERO)) {
355 memset(va, 0x70, osize);
358 return ((void *) va);
364 * Free a block of memory allocated by malloc.
366 * This routine may not block.
375 struct malloc_type *type;
384 /* free(NULL, ...) does nothing */
388 KASSERT(type->ks_memuse > 0,
389 ("malloc(9)/free(9) confusion.\n%s",
390 "Probably freeing with wrong type, but maybe not here."));
394 slab = vtoslab((vm_offset_t)addr & (~UMA_SLAB_MASK));
397 panic("free: address %p(%p) has not been allocated.\n",
398 addr, (void *)((u_long)addr & (~UMA_SLAB_MASK)));
401 if (!(slab->us_flags & UMA_SLAB_MALLOC)) {
403 struct malloc_type **mtp = addr;
405 size = slab->us_keg->uk_size;
408 * Cache a pointer to the malloc_type that most recently freed
409 * this memory here. This way we know who is most likely to
410 * have stepped on it later.
412 * This code assumes that size is a multiple of 8 bytes for
415 mtp = (struct malloc_type **)
416 ((unsigned long)mtp & ~UMA_ALIGN_PTR);
417 mtp += (size - sizeof(struct malloc_type *)) /
418 sizeof(struct malloc_type *);
421 uma_zfree_arg(LIST_FIRST(&slab->us_keg->uk_zones), addr, slab);
423 size = slab->us_size;
424 uma_large_free(slab);
427 switch (ddekit_pgtab_get_type(addr)) {
429 // size was small enough to use uma
430 zone = uma_getzone(addr);
431 KASSERT(zone->flags&UMA_ZONE_MALLOC, ("free called on uma allocated item"));
433 uma_zfree(zone, addr);
436 // we had to use large_malloc, so now use large_free
437 zone = uma_getzone(addr);
438 size = ddekit_pgtab_get_size(addr);
439 bsd_large_free(addr);
442 ddekit_debug("unknown memory class");
445 #endif /* DDE_FBSD */
447 malloc_type_freed(type, size);
451 * realloc: change the size of a memory block
455 realloc(addr, size, type, flags)
457 bsd_realloc(addr, size, type, flags)
461 struct malloc_type *type;
468 #endif /* DDE_FBSD */
472 /* realloc(NULL, ...) is equivalent to malloc(...) */
474 return (malloc(size, type, flags));
477 slab = vtoslab((vm_offset_t)addr & ~(UMA_SLAB_MASK));
480 KASSERT(slab != NULL,
481 ("realloc: address %p out of range", (void *)addr));
483 /* Get the size of the original block */
485 alloc = slab->us_keg->uk_size;
487 alloc = slab->us_size;
490 switch (ddekit_pgtab_get_type(addr)) {
492 // was allocated with uma
493 zone = uma_getzone(addr);
497 // had to use large_malloc
498 alloc = ddekit_pgtab_get_size(addr);
501 #endif /* DDE_FBSD */
504 /* Reuse the original block if appropriate */
506 && (size > (alloc >> REALLOC_FRACTION) || alloc == MINALLOCSIZE))
509 /* Allocate a new, bigger (or smaller) block */
510 if ((newaddr = malloc(size, type, flags)) == NULL)
513 /* Copy over original contents */
514 bcopy(addr, newaddr, min(size, alloc));
520 * reallocf: same as realloc() but free memory on failure.
524 reallocf(addr, size, type, flags)
526 bsd_reallocf(addr, size, type, flags)
530 struct malloc_type *type;
535 if ((mem = realloc(addr, size, type, flags)) == NULL)
541 * Initialize the kernel memory allocator
554 mtx_init(&malloc_mtx, "malloc", NULL, MTX_DEF);
558 * Try to auto-tune the kernel memory size, so that it is
559 * more applicable for a wider range of machine sizes.
560 * On an X86, a VM_KMEM_SIZE_SCALE value of 4 is good, while
561 * a VM_KMEM_SIZE of 12MB is a fair compromise. The
562 * VM_KMEM_SIZE_MAX is dependent on the maximum KVA space
563 * available, and on an X86 with a total KVA space of 256MB,
564 * try to keep VM_KMEM_SIZE_MAX at 80MB or below.
566 * Note that the kmem_map is also used by the zone allocator,
567 * so make sure that there is enough space.
569 vm_kmem_size = VM_KMEM_SIZE + nmbclusters * PAGE_SIZE;
570 mem_size = cnt.v_page_count;
572 #if defined(VM_KMEM_SIZE_SCALE)
573 vm_kmem_size_scale = VM_KMEM_SIZE_SCALE;
575 TUNABLE_INT_FETCH("vm.kmem_size_scale", &vm_kmem_size_scale);
576 if (vm_kmem_size_scale > 0 &&
577 (mem_size / vm_kmem_size_scale) > (vm_kmem_size / PAGE_SIZE))
578 vm_kmem_size = (mem_size / vm_kmem_size_scale) * PAGE_SIZE;
580 #if defined(VM_KMEM_SIZE_MAX)
581 vm_kmem_size_max = VM_KMEM_SIZE_MAX;
583 TUNABLE_INT_FETCH("vm.kmem_size_max", &vm_kmem_size_max);
584 if (vm_kmem_size_max > 0 && vm_kmem_size >= vm_kmem_size_max)
585 vm_kmem_size = vm_kmem_size_max;
587 /* Allow final override from the kernel environment */
589 if (TUNABLE_INT_FETCH("kern.vm.kmem.size", &vm_kmem_size) != 0)
590 printf("kern.vm.kmem.size is now called vm.kmem_size!\n");
592 TUNABLE_INT_FETCH("vm.kmem_size", &vm_kmem_size);
595 * Limit kmem virtual size to twice the physical memory.
596 * This allows for kmem map sparseness, but limits the size
597 * to something sane. Be careful to not overflow the 32bit
598 * ints while doing the check.
600 if (((vm_kmem_size / 2) / PAGE_SIZE) > cnt.v_page_count)
601 vm_kmem_size = 2 * cnt.v_page_count * PAGE_SIZE;
604 * Tune settings based on the kernel map's size at this time.
606 init_param3(vm_kmem_size / PAGE_SIZE);
608 kmem_map = kmem_suballoc(kernel_map, (vm_offset_t *)&kmembase,
609 (vm_offset_t *)&kmemlimit, vm_kmem_size);
610 kmem_map->system_map = 1;
613 #endif /* DDE_FBSD */
615 for (i = 0, indx = 0; kmemzones[indx].kz_size != 0; indx++) {
616 int size = kmemzones[indx].kz_size;
617 char *name = kmemzones[indx].kz_name;
619 kmemzones[indx].kz_zone = uma_zcreate(name, size,
621 mtrash_ctor, mtrash_dtor, mtrash_init, mtrash_fini,
623 NULL, NULL, NULL, NULL,
625 UMA_ALIGN_PTR, UMA_ZONE_MALLOC);
627 for (;i <= size; i+= KMEM_ZBASE)
628 kmemsize[i >> KMEM_ZSHIFT] = indx;
637 struct malloc_type *type = (struct malloc_type *)data;
639 mtx_lock(&malloc_mtx);
640 if (type->ks_magic != M_MAGIC)
641 panic("malloc type lacks magic");
643 if (cnt.v_page_count == 0)
644 panic("malloc_init not allowed before vm init");
646 if (type->ks_next != NULL)
649 type->ks_next = kmemstatistics;
650 kmemstatistics = type;
651 mtx_init(&type->ks_mtx, type->ks_shortdesc, "Malloc Stats", MTX_DEF);
652 mtx_unlock(&malloc_mtx);
659 struct malloc_type *type = (struct malloc_type *)data;
660 struct malloc_type *t;
662 mtx_lock(&malloc_mtx);
663 mtx_lock(&type->ks_mtx);
664 if (type->ks_magic != M_MAGIC)
665 panic("malloc type lacks magic");
667 if (cnt.v_page_count == 0)
668 panic("malloc_uninit not allowed before vm init");
670 if (type == kmemstatistics)
671 kmemstatistics = type->ks_next;
673 for (t = kmemstatistics; t->ks_next != NULL; t = t->ks_next) {
674 if (t->ks_next == type) {
675 t->ks_next = type->ks_next;
680 type->ks_next = NULL;
681 mtx_destroy(&type->ks_mtx);
682 mtx_unlock(&malloc_mtx);
686 sysctl_kern_malloc(SYSCTL_HANDLER_ARGS)
688 struct malloc_type *type;
702 mtx_lock(&malloc_mtx);
703 for (type = kmemstatistics; type != NULL; type = type->ks_next)
706 mtx_unlock(&malloc_mtx);
707 bufsize = linesize * (cnt + 1);
708 p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
709 mtx_lock(&malloc_mtx);
711 len = snprintf(p, linesize,
712 "\n Type InUse MemUse HighUse Requests Size(s)\n");
715 for (type = kmemstatistics; cnt != 0 && type != NULL;
716 type = type->ks_next, cnt--) {
717 if (type->ks_calls == 0)
720 curline = linesize - 2; /* Leave room for the \n */
721 len = snprintf(p, curline, "%13s%6lu%6luK%7luK%9llu",
724 (type->ks_memuse + 1023) / 1024,
725 (type->ks_maxused + 1023) / 1024,
726 (long long unsigned)type->ks_calls);
731 for (i = 0; i < sizeof(kmemzones) / sizeof(kmemzones[0]) - 1;
733 if (type->ks_size & (1 << i)) {
735 len = snprintf(p, curline, " ");
737 len = snprintf(p, curline, ",");
741 len = snprintf(p, curline,
742 "%s", kmemzones[i].kz_name);
750 len = snprintf(p, 2, "\n");
754 mtx_unlock(&malloc_mtx);
755 error = SYSCTL_OUT(req, buf, p - buf);
761 SYSCTL_OID(_kern, OID_AUTO, malloc, CTLTYPE_STRING|CTLFLAG_RD,
762 NULL, 0, sysctl_kern_malloc, "A", "Malloc Stats");
764 #ifdef MALLOC_PROFILE
767 sysctl_kern_mprof(SYSCTL_HANDLER_ARGS)
782 bufsize = linesize * (KMEM_ZSIZE + 1);
783 bufsize += 128; /* For the stats line */
784 bufsize += 128; /* For the banner line */
788 p = buf = (char *)malloc(bufsize, M_TEMP, M_WAITOK|M_ZERO);
789 len = snprintf(p, bufsize,
790 "\n Size Requests Real Size\n");
794 for (i = 0; i < KMEM_ZSIZE; i++) {
795 size = i << KMEM_ZSHIFT;
796 rsize = kmemzones[kmemsize[i]].kz_size;
797 count = (long long unsigned)krequests[i];
799 len = snprintf(p, bufsize, "%6d%28llu%11d\n",
800 size, (unsigned long long)count, rsize);
804 if ((rsize * count) > (size * count))
805 waste += (rsize * count) - (size * count);
806 mem += (rsize * count);
809 len = snprintf(p, bufsize,
810 "\nTotal memory used:\t%30llu\nTotal Memory wasted:\t%30llu\n",
811 (unsigned long long)mem, (unsigned long long)waste);
814 error = SYSCTL_OUT(req, buf, p - buf);
820 SYSCTL_OID(_kern, OID_AUTO, mprof, CTLTYPE_STRING|CTLFLAG_RD,
821 NULL, 0, sysctl_kern_mprof, "A", "Malloc Profiling");
822 #endif /* MALLOC_PROFILE */