mm/slab.c
35266 /*
35267 * linux/mm/slab.c
35268 * Written by Mark Hemment, 1996/97.
35269 * (markhe@nextd.demon.co.uk)
35270 *
35271 * 11 April '97. Started multi-threading - markhe
35272 * The global cache-chain is protected by the
35273 * semaphore 'cache_chain_sem'. The sem is only
35274 * needed when accessing/extending the cache-chain,
35275 * which can never happen inside an interrupt
35276 * (kmem_cache_create(), kmem_cache_shrink() and
35277 * kmem_cache_reap()). This is a medium-term
35278 * exclusion lock.
35279 *
35280 * Each cache has its own lock; 'c_spinlock'. This
35281 * lock is needed only when accessing non-constant
35282 * members of a cache-struct. Note: 'constant
35283 * members' are assigned a value in
35284 * kmem_cache_create() before the cache is linked
35285 * into the cache-chain. The values never change,
35286 * so not even a multi-reader lock is needed for
35287 * these members. The c_spinlock is only ever held
35288 * for a few cycles.
35289 *
35290 * To prevent kmem_cache_shrink() trying to shrink a
35291 * 'growing' cache (which maybe be sleeping and
35292 * therefore not holding the semaphore/lock), the
35293 * c_growing field is used. This also prevents
35294 * reaping from a cache.
35295 *
35296 * Note, caches can _never_ be destroyed. When a
35297 * sub-system (eg module) has finished with a cache,
35298 * it can only be shrunk. This leaves the cache
35299 * empty, but already enabled for re-use, eg. during
35300 * a module re-load.
35301 *
35302 * Notes:
35303 * o Constructors/deconstructors are called
35304 * while the cache-lock is _not_ held.
35305 * Therefore they _must_ be threaded.
35306 * o Constructors must not attempt to
35307 * allocate memory from the same cache that
35308 * they are a constructor for - infinite
35309 * loop! (There is no easy way to trap
35310 * this.)
35311 * o The per-cache locks must be obtained
35312 * with local-interrupts disabled. o When
35313 * compiled with debug support, and an
35314 * object-verify (upon release) is request
35315 * for a cache, the verify-function is
35316 * called with the cache lock held. This
35317 * helps debugging.
35318 * o The functions called from
35319 * try_to_free_page() must not attempt to
35320 * allocate memory from a cache which is
35321 * being grown. The buffer sub-system might
35322 * try to allocate memory, via
35323 * buffer_cachep. As this pri is passed to
35324 * the SLAB, and then (if necessary) onto
35325 * the gfp() funcs (which avoid calling
35326 * try_to_free_page()), no deadlock should
35327 * happen.
35328 *
35329 * The positioning of the per-cache lock is tricky.
35330 * If the lock is placed on the same h/w cache line
35331 * as commonly accessed members the number of L1
35332 * cache-line faults is reduced. However, this can
35333 * lead to the cache-line ping-ponging between
35334 * processors when the lock is in contention (and
35335 * the common members are being accessed). Decided
35336 * to keep it away from common members.
35337 *
35338 * More fine-graining is possible, with per-slab
35339 * locks...but this might be taking fine graining
35340 * too far, but would have the advantage;
35341 *
35342 * During most allocs/frees no writes occur
35343 * to the cache-struct. Therefore a
35344 * multi-reader/one writer lock could be
35345 * used (the writer needed when the slab
35346 * chain is being link/unlinked). As we
35347 * would not have an exclusion lock for the
35348 * cache-structure, one would be needed
35349 * per-slab (for updating s_free ptr, and/or
35350 * the contents of s_index).
35351 *
35352 * The above locking would allow parallel operations
35353 * to different slabs within the same cache with
35354 * reduced spinning.
35355 *
35356 * Per-engine slab caches, backed by a global cache
35357 * (as in Mach's Zone allocator), would allow most
35358 * allocations from the same cache to execute in
35359 * parallel.
35360 *
35361 * At present, each engine can be growing a cache.
35362 * This should be blocked.
35363 *
35364 * It is not currently 100% safe to examine the
35365 * page_struct outside of a kernel or global cli
35366 * lock. The risk is v. small, and non-fatal.
35367 *
35368 * Calls to printk() are not 100% safe (the function
35369 * is not threaded). However, printk() is only used
35370 * under an error condition, and the risk is
35371 * v. small (not sure if the console write functions
35372 * 'enjoy' executing multiple contexts in parallel.
35373 * I guess they don't...). Note, for most calls to
35374 * printk() any held cache-lock is dropped. This is
35375 * not always done for text size reasons - having
35376 * *_unlock() everywhere is bloat. */
35377
35378 /* An implementation of the Slab Allocator as described
35379 * in outline in;
35380 * UNIX Internals: The New Frontiers by Uresh Vahalia
35381 * Pub: Prentice Hall ISBN 0-13-101908-2
35382 * or with a little more detail in;
35383 * The Slab Allocator: An Object-Caching Kernel
35384 * Memory Allocator
35385 * Jeff Bonwick (Sun Microsystems).
35386 * Presented at: USENIX Summer 1994 Technical
35387 * Conference */
35388
35389 /* This implementation deviates from Bonwick's paper as
35390 * it does not use a hash-table for large objects, but
35391 * rather a per slab index to hold the bufctls. This
35392 * allows the bufctl structure to be small (one word),
35393 * but limits the number of objects a slab (not a cache)
35394 * can contain when off-slab bufctls are used. The limit
35395 * is the size of the largest general cache that does not
35396 * use off-slab bufctls, divided by the size of a bufctl.
35397 * For 32bit archs, is this 256/4 = 64. This is not
35398 * serious, as it is only for large objects, when it is
35399 * unwise to have too many per slab.
35400 *
35401 * Note: This limit can be raised by introducing a
35402 * general cache whose size is less than 512
35403 * (PAGE_SIZE<<3), but greater than 256. */
35404
35405 #include <linux/config.h>
35406 #include <linux/slab.h>
35407 #include <linux/interrupt.h>
35408 #include <linux/init.h>
35409
35410 /* If there is a different PAGE_SIZE around, and it works
35411 * with this allocator, then change the following. */
35412 #if (PAGE_SIZE != 8192 && PAGE_SIZE != 4096)
35413 #error Your page size is probably not correctly \
35414 supported - please check
35415 #endif
35416
35417 /* SLAB_MGMT_CHECKS - 1 to enable extra checks in
35418 * kmem_cache_create().
35419 * 0 if you wish to reduce memory
35420 * usage.
35421 *
35422 * SLAB_DEBUG_SUPPORT - 1 for kmem_cache_create() to
35423 * honour; SLAB_DEBUG_FREE,
35424 * SLAB_DEBUG_INITIAL,
35425 * SLAB_RED_ZONE & SLAB_POISON.
35426 * 0 for faster, smaller, code
35427 * (especially in the critical
35428 * paths).
35429 *
35430 * SLAB_STATS - 1 to collect stats for
35431 * /proc/slabinfo.
35432 * 0 for faster, smaller, code
35433 * (especially in the critical
35434 * paths).
35435 *
35436 * SLAB_SELFTEST - 1 to perform a few tests, mainly
35437 * for development. */
35438 #define SLAB_MGMT_CHECKS 1
35439 #define SLAB_DEBUG_SUPPORT 0
35440 #define SLAB_STATS 0
35441 #define SLAB_SELFTEST 0
35442
35443 /* Shouldn't this be in a header file somewhere? */
35444 #define BYTES_PER_WORD sizeof(void *)
35445
35446 /* Legal flag mask for kmem_cache_create(). */
35447 #if SLAB_DEBUG_SUPPORT
35448 #if 0
35449 #define SLAB_C_MASK \
35450 (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \
35451 SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP| \
35452 SLAB_HIGH_PACK) \
35453 #endif
35454 #define SLAB_C_MASK \
35455 (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE| \
35456 SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
35457 #else
35458 #if 0
35459 #define SLAB_C_MASK \
35460 (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP|SLAB_HIGH_PACK)
35461 #endif
35462 #define SLAB_C_MASK \
35463 (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
35464 #endif /* SLAB_DEBUG_SUPPORT */
35465
35466 /* Slab management struct. Manages the objs in a slab.
35467 * Placed either at the end of mem allocated for a slab,
35468 * or from an internal obj cache (cache_slabp). Slabs
35469 * are chained into a partially ordered list; fully used
35470 * first, partial next, and then fully free slabs. The
35471 * first 4 members are referenced during an alloc/free
35472 * operation, and should always appear on the same cache
35473 * line. Note: The offset between some members _must_
35474 * match offsets within the kmem_cache_t - see
35475 * kmem_cache_init() for the checks. */
35476
35477 /* could make this larger for 64bit archs */
35478 #define SLAB_OFFSET_BITS 16
35479
35480 typedef struct kmem_slab_s {
35481 /* ptr to first inactive obj in slab */
35482 struct kmem_bufctl_s *s_freep;
35483 struct kmem_bufctl_s *s_index;
35484 unsigned long s_magic;
35485 /* num of objs active in slab */
35486 unsigned long s_inuse;
35487
35488 struct kmem_slab_s *s_nextp;
35489 struct kmem_slab_s *s_prevp;
35490 /* addr of first obj in slab */
35491 void *s_mem;
35492 unsigned long s_offset:SLAB_OFFSET_BITS,
35493 s_dma:1;
35494 } kmem_slab_t;
35495
35496 /* When the slab management is on-slab, this gives the
35497 * size to use. */
35498 #define slab_align_size \
35499 (L1_CACHE_ALIGN(sizeof(kmem_slab_t)))
35500
35501 /* Test for end of slab chain. */
35502 #define kmem_slab_end(x) \
35503 ((kmem_slab_t*)&((x)->c_offset))
35504
35505 /* s_magic */
35506 #define SLAB_MAGIC_ALLOC 0xA5C32F2BUL /* alive */
35507 #define SLAB_MAGIC_DESTROYED 0xB2F23C5AUL /* destroyed */
35508
35509 /* Bufctl's are used for linking objs within a slab,
35510 * identifying what slab an obj is in, and the address of
35511 * the associated obj (for sanity checking with off-slab
35512 * bufctls). What a bufctl contains depends upon the
35513 * state of the obj and the organisation of the cache. */
35514 typedef struct kmem_bufctl_s {
35515 union {
35516 struct kmem_bufctl_s *buf_nextp;
35517 kmem_slab_t *buf_slabp; /* slab for obj */
35518 void * buf_objp;
35519 } u;
35520 } kmem_bufctl_t;
35521
35522 /* ...shorthand... */
35523 #define buf_nextp u.buf_nextp
35524 #define buf_slabp u.buf_slabp
35525 #define buf_objp u.buf_objp
35526
35527 #if SLAB_DEBUG_SUPPORT
35528 /* Magic nums for obj red zoning. Placed in the first
35529 * word before and the first word after an obj. */
35530 #define SLAB_RED_MAGIC1 0x5A2CF071UL /* obj active */
35531 #define SLAB_RED_MAGIC2 0x170FC2A5UL /* obj inactive */
35532
35533 /* ...and for poisoning */
35534 #define SLAB_POISON_BYTE 0x5a /* byte val for poisoning*/
35535 #define SLAB_POISON_END 0xa5 /* end-byte of poisoning */
35536
35537 #endif /* SLAB_DEBUG_SUPPORT */
35538
35539 /* Cache struct - manages a cache. First four members
35540 * are commonly referenced during an alloc/free
35541 * operation. */
35542 struct kmem_cache_s {
35543 kmem_slab_t *c_freep; /* first w/ free objs */
35544 unsigned long c_flags; /* constant flags */
35545 unsigned long c_offset;
35546 unsigned long c_num; /* # of objs per slab */
35547
35548 unsigned long c_magic;
35549 unsigned long c_inuse; /* kept at zero */
35550 kmem_slab_t *c_firstp; /* first slab in chain */
35551 kmem_slab_t *c_lastp; /* last slab in chain */
35552
35553 spinlock_t c_spinlock;
35554 unsigned long c_growing;
35555 unsigned long c_dflags; /* dynamic flags */
35556 size_t c_org_size;
35557 unsigned long c_gfporder; /* ord pgs per slab (2^n) */
35558 /* constructor func */
35559 void (*c_ctor)(void *, kmem_cache_t *, unsigned long);
35560 /* de-constructor func */
35561 void (*c_dtor)(void *, kmem_cache_t *, unsigned long);
35562 unsigned long c_align; /* alignment of objs */
35563 size_t c_colour; /* cache coloring range*/
35564 size_t c_colour_next; /* cache coloring */
35565 unsigned long c_failures;
35566 const char *c_name;
35567 struct kmem_cache_s *c_nextp;
35568 kmem_cache_t *c_index_cachep;
35569 #if SLAB_STATS
35570 unsigned long c_num_active;
35571 unsigned long c_num_allocations;
35572 unsigned long c_high_mark;
35573 unsigned long c_grown;
35574 unsigned long c_reaped;
35575 atomic_t c_errors;
35576 #endif /* SLAB_STATS */
35577 };
35578
35579 /* internal c_flags */
35580 /* slab management in own cache */
35581 #define SLAB_CFLGS_OFF_SLAB 0x010000UL
35582 /* bufctls in own cache */
35583 #define SLAB_CFLGS_BUFCTL 0x020000UL
35584 /* a general cache */
35585 #define SLAB_CFLGS_GENERAL 0x080000UL
35586
35587 /* c_dflags (dynamic flags). Need to hold the spinlock
35588 * to access this member */
35589 /* don't reap a recently grown */
35590 #define SLAB_CFLGS_GROWN 0x000002UL
35591
35592 #define SLAB_OFF_SLAB(x) ((x) & SLAB_CFLGS_OFF_SLAB)
35593 #define SLAB_BUFCTL(x) ((x) & SLAB_CFLGS_BUFCTL)
35594 #define SLAB_GROWN(x) ((x) & SLAB_CFLGS_GROWN)
35595
35596 #if SLAB_STATS
35597 #define SLAB_STATS_INC_ACTIVE(x) ((x)->c_num_active++)
35598 #define SLAB_STATS_DEC_ACTIVE(x) ((x)->c_num_active--)
35599 #define SLAB_STATS_INC_ALLOCED(x) \
35600 ((x)->c_num_allocations++)
35601 #define SLAB_STATS_INC_GROWN(x) ((x)->c_grown++)
35602 #define SLAB_STATS_INC_REAPED(x) ((x)->c_reaped++)
35603 #define SLAB_STATS_SET_HIGH(x) \
35604 do { \
35605 if ((x)->c_num_active > (x)->c_high_mark) \
35606 (x)->c_high_mark = (x)->c_num_active; \
35607 } while (0)
35608 #define SLAB_STATS_INC_ERR(x) \
35609 (atomic_inc(&(x)->c_errors))
35610 #else
35611 #define SLAB_STATS_INC_ACTIVE(x)
35612 #define SLAB_STATS_DEC_ACTIVE(x)
35613 #define SLAB_STATS_INC_ALLOCED(x)
35614 #define SLAB_STATS_INC_GROWN(x)
35615 #define SLAB_STATS_INC_REAPED(x)
35616 #define SLAB_STATS_SET_HIGH(x)
35617 #define SLAB_STATS_INC_ERR(x)
35618 #endif /* SLAB_STATS */
35619
35620 #if SLAB_SELFTEST
35621 #if !SLAB_DEBUG_SUPPORT
35622 #error Debug support needed for self-test
35623 #endif
35624 static void kmem_self_test(void);
35625 #endif /* SLAB_SELFTEST */
35626
35627 /* c_magic - used to detect 'out of slabs' in
35628 * __kmem_cache_alloc() */
35629 #define SLAB_C_MAGIC 0x4F17A36DUL
35630
35631 /* maximum size of an obj (in 2^order pages) */
35632 #define SLAB_OBJ_MAX_ORDER 5 /* 32 pages */
35633
35634 /* maximum num of pages for a slab (prevents large
35635 * requests to the VM layer) */
35636 #define SLAB_MAX_GFP_ORDER 5 /* 32 pages */
35637
35638 /* the 'preferred' minimum num of objs per slab - maybe
35639 * less for large objs */
35640 #define SLAB_MIN_OBJS_PER_SLAB 4
35641
35642 /* If the num of objs per slab is <=
35643 * SLAB_MIN_OBJS_PER_SLAB, then the page order must be
35644 * less than this before trying the next order. */
35645 #define SLAB_BREAK_GFP_ORDER_HI 2
35646 #define SLAB_BREAK_GFP_ORDER_LO 1
35647 static int slab_break_gfp_order =
35648 SLAB_BREAK_GFP_ORDER_LO;
35649
35650 /* Macros for storing/retrieving the cachep and or slab
35651 * from the global 'mem_map'. With off-slab bufctls,
35652 * these are used to find the slab an obj belongs to.
35653 * With kmalloc(), and kfree(), these are used to find
35654 * the cache which an obj belongs to. */
35655 #define SLAB_SET_PAGE_CACHE(pg, x) \
35656 ((pg)->next = (struct page *)(x))
35657 #define SLAB_GET_PAGE_CACHE(pg) \
35658 ((kmem_cache_t *)(pg)->next)
35659 #define SLAB_SET_PAGE_SLAB(pg, x) \
35660 ((pg)->prev = (struct page *)(x))
35661 #define SLAB_GET_PAGE_SLAB(pg) \
35662 ((kmem_slab_t *)(pg)->prev)
35663
35664 /* Size description struct for general caches. */
35665 typedef struct cache_sizes {
35666 size_t cs_size;
35667 kmem_cache_t *cs_cachep;
35668 } cache_sizes_t;
35669
35670 static cache_sizes_t cache_sizes[] = {
35671 #if PAGE_SIZE == 4096
35672 { 32, NULL},
35673 #endif
35674 { 64, NULL},
35675 { 128, NULL},
35676 { 256, NULL},
35677 { 512, NULL},
35678 {1024, NULL},
35679 {2048, NULL},
35680 {4096, NULL},
35681 {8192, NULL},
35682 {16384, NULL},
35683 {32768, NULL},
35684 {65536, NULL},
35685 {131072, NULL},
35686 {0, NULL}
35687 };
35688
35689 /* Names for the general caches. Not placed into the
35690 * sizes struct for a good reason; the string ptr is not
35691 * needed while searching in kmalloc(), and would
35692 * 'get-in-the-way' in the h/w cache. */
35693 static char *cache_sizes_name[] = {
35694 #if PAGE_SIZE == 4096
35695 "size-32",
35696 #endif
35697 "size-64",
35698 "size-128",
35699 "size-256",
35700 "size-512",
35701 "size-1024",
35702 "size-2048",
35703 "size-4096",
35704 "size-8192",
35705 "size-16384",
35706 "size-32768",
35707 "size-65536",
35708 "size-131072"
35709 };
35710
35711 /* internal cache of cache description objs */
35712 static kmem_cache_t cache_cache = {
35713 /* freep, flags */ kmem_slab_end(&cache_cache),
35714 SLAB_NO_REAP,
35715 /* offset, num */ sizeof(kmem_cache_t), 0,
35716 /* c_magic, c_inuse */ SLAB_C_MAGIC, 0,
35717 /* firstp, lastp */ kmem_slab_end(&cache_cache),
35718 kmem_slab_end(&cache_cache),
35719 /* spinlock */ SPIN_LOCK_UNLOCKED,
35720 /* growing */ 0,
35721 /* dflags */ 0,
35722 /* org_size, gfp */ 0, 0,
35723 /* ctor, dtor, align */ NULL, NULL, L1_CACHE_BYTES,
35724 /* colour, colour_next */ 0, 0,
35725 /* failures */ 0,
35726 /* name */ "kmem_cache",
35727 /* nextp */ &cache_cache,
35728 /* index */ NULL,
35729 };
35730
35731 /* Guard access to the cache-chain. */
35732 static struct semaphore cache_chain_sem;
35733
35734 /* Place maintainer for reaping. */
35735 static kmem_cache_t *clock_searchp = &cache_cache;
35736
35737 /* Internal slab management cache, for when slab
35738 * management is off-slab. */
35739 static kmem_cache_t *cache_slabp = NULL;
35740
35741 /* Max number of objs-per-slab for caches which use
35742 * bufctl's. Needed to avoid a possible looping
35743 * condition in kmem_cache_grow(). */
35744 static unsigned long bufctl_limit = 0;
35745
35746 /* Initialisation - setup the `cache' cache. */
35747 long __init kmem_cache_init(long start, long end)
35748 {
35749 size_t size, i;
35750
35751 #define kmem_slab_offset(x) \
35752 ((unsigned long)&((kmem_slab_t *)0)->x)
35753 #define kmem_slab_diff(a,b) \
35754 (kmem_slab_offset(a) - kmem_slab_offset(b))
35755 #define kmem_cache_offset(x) \
35756 ((unsigned long)&((kmem_cache_t *)0)->x)
35757 #define kmem_cache_diff(a,b) \
35758 (kmem_cache_offset(a) - kmem_cache_offset(b))
35759
35760 /* Sanity checks... */
35761 if (kmem_cache_diff(c_firstp, c_magic) !=
35762 kmem_slab_diff(s_nextp, s_magic) ||
35763 kmem_cache_diff(c_firstp, c_inuse) !=
35764 kmem_slab_diff(s_nextp, s_inuse) ||
35765 ((kmem_cache_offset(c_lastp) -
35766 ((unsigned long)
35767 kmem_slab_end((kmem_cache_t*)NULL))) !=
35768 kmem_slab_offset(s_prevp)) ||
35769 kmem_cache_diff(c_lastp, c_firstp) !=
35770 kmem_slab_diff(s_prevp, s_nextp)) {
35771 /* Offsets to the magic are incorrect, either the
35772 * structures have been incorrectly changed, or
35773 * adjustments are needed for your architecture. */
35774 panic("kmem_cache_init(): Offsets are wrong - "
35775 "I've been messed with!");
35776 /* NOTREACHED */
35777 }
35778 #undef kmem_cache_offset
35779 #undef kmem_cache_diff
35780 #undef kmem_slab_offset
35781 #undef kmem_slab_diff
35782
35783 cache_chain_sem = MUTEX;
35784
35785 size = cache_cache.c_offset + sizeof(kmem_bufctl_t);
35786 size += (L1_CACHE_BYTES-1);
35787 size &= ~(L1_CACHE_BYTES-1);
35788 cache_cache.c_offset = size-sizeof(kmem_bufctl_t);
35789
35790 i = (PAGE_SIZE << cache_cache.c_gfporder) -
35791 slab_align_size;
35792 cache_cache.c_num = i / size; /* objs / slab */
35793
35794 /* Cache colouring. */
35795 cache_cache.c_colour =
35796 (i-(cache_cache.c_num*size))/L1_CACHE_BYTES;
35797 cache_cache.c_colour_next = cache_cache.c_colour;
35798
35799 /* Fragmentation resistance on low memory - only use
35800 * bigger page orders on machines with more than 32MB
35801 * of memory. */
35802 if (num_physpages > (32 << 20) >> PAGE_SHIFT)
35803 slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_HI;
35804 return start;
35805 }
35806
35807 /* Initialisation - setup remaining internal and general
35808 * caches. Called after the gfp() functions have been
35809 * enabled, and before smp_init(). */
35810 void __init kmem_cache_sizes_init(void)
35811 {
35812 unsigned int found = 0;
35813
35814 cache_slabp = kmem_cache_create("slab_cache",
35815 sizeof(kmem_slab_t), 0, SLAB_HWCACHE_ALIGN,
35816 NULL, NULL);
35817 if (cache_slabp) {
35818 char **names = cache_sizes_name;
35819 cache_sizes_t *sizes = cache_sizes;
35820 do {
35821 /* For performance, all the general caches are L1
35822 * aligned. This should be particularly beneficial
35823 * on SMP boxes, as it eliminates "false sharing".
35824 * Note for systems short on memory removing the
35825 * alignment will allow tighter packing of the
35826 * smaller caches. */
35827 if (!(sizes->cs_cachep =
35828 kmem_cache_create(*names++, sizes->cs_size,
35829 0, SLAB_HWCACHE_ALIGN, NULL, NULL)))
35830 goto panic_time;
35831 if (!found) {
35832 /* Inc off-slab bufctl limit until the ceiling is
35833 * hit. */
35834 if (SLAB_BUFCTL(sizes->cs_cachep->c_flags))
35835 found++;
35836 else
35837 bufctl_limit =
35838 (sizes->cs_size/sizeof(kmem_bufctl_t));
35839 }
35840 sizes->cs_cachep->c_flags |= SLAB_CFLGS_GENERAL;
35841 sizes++;
35842 } while (sizes->cs_size);
35843 #if SLAB_SELFTEST
35844 kmem_self_test();
35845 #endif /* SLAB_SELFTEST */
35846 return;
35847 }
35848 panic_time:
35849 panic("kmem_cache_sizes_init: Error creating caches");
35850 /* NOTREACHED */
35851 }
35852
35853 /* Interface to system's page allocator. Dma pts to
35854 * non-zero if all of memory is DMAable. No need to hold
35855 * the cache-lock. */
35856 static inline void *
35857 kmem_getpages(kmem_cache_t *cachep, unsigned long flags,
35858 unsigned int *dma)
35859 {
35860 void *addr;
35861
35862 *dma = flags & SLAB_DMA;
35863 addr =
35864 (void*) __get_free_pages(flags, cachep->c_gfporder);
35865 /* Assume that now we have the pages no one else can
35866 * legally messes with the 'struct page's. However
35867 * vm_scan() might try to test the structure to see if
35868 * it is a named-page or buffer-page. The members it
35869 * tests are of no interest here..... */
35870 if (!*dma && addr) {
35871 /* Need to check if can dma. */
35872 struct page *page = mem_map + MAP_NR(addr);
35873 *dma = 1 << cachep->c_gfporder;
35874 while ((*dma)--) {
35875 if (!PageDMA(page)) {
35876 *dma = 0;
35877 break;
35878 }
35879 page++;
35880 }
35881 }
35882 return addr;
35883 }
35884
35885 /* Interface to system's page release. */
35886 static inline void
35887 kmem_freepages(kmem_cache_t *cachep, void *addr)
35888 {
35889 unsigned long i = (1<<cachep->c_gfporder);
35890 struct page *page = &mem_map[MAP_NR(addr)];
35891
35892 /* free_pages() does not clear the type bit - we do
35893 * that. The pages have been unlinked from their
35894 * cache-slab, but their 'struct page's might be
35895 * accessed in vm_scan(). Shouldn't be a worry. */
35896 while (i--) {
35897 PageClearSlab(page);
35898 page++;
35899 }
35900 free_pages((unsigned long)addr, cachep->c_gfporder);
35901 }
35902
35903 #if SLAB_DEBUG_SUPPORT
35904 static inline void
35905 kmem_poison_obj(kmem_cache_t *cachep, void *addr)
35906 {
35907 memset(addr, SLAB_POISON_BYTE, cachep->c_org_size);
35908 *(unsigned char *)(addr+cachep->c_org_size-1) =
35909 SLAB_POISON_END;
35910 }
35911
35912 static inline int
35913 kmem_check_poison_obj(kmem_cache_t *cachep, void *addr)
35914 {
35915 void *end;
35916 end = memchr(addr, SLAB_POISON_END,
35917 cachep->c_org_size);
35918 if (end != (addr+cachep->c_org_size-1))
35919 return 1;
35920 return 0;
35921 }
35922 #endif /* SLAB_DEBUG_SUPPORT */
35923
35924 /* Three slab chain funcs - all called with ints disabled
35925 * and the appropriate cache-lock held. */
35926 static inline void
35927 kmem_slab_unlink(kmem_slab_t *slabp)
35928 {
35929 kmem_slab_t *prevp = slabp->s_prevp;
35930 kmem_slab_t *nextp = slabp->s_nextp;
35931 prevp->s_nextp = nextp;
35932 nextp->s_prevp = prevp;
35933 }
35934
35935 static inline void
35936 kmem_slab_link_end(kmem_cache_t *cachep,
35937 kmem_slab_t *slabp)
35938 {
35939 kmem_slab_t *lastp = cachep->c_lastp;
35940 slabp->s_nextp = kmem_slab_end(cachep);
35941 slabp->s_prevp = lastp;
35942 cachep->c_lastp = slabp;
35943 lastp->s_nextp = slabp;
35944 }
35945
35946 static inline void
35947 kmem_slab_link_free(kmem_cache_t *cachep,
35948 kmem_slab_t *slabp)
35949 {
35950 kmem_slab_t *nextp = cachep->c_freep;
35951 kmem_slab_t *prevp = nextp->s_prevp;
35952 slabp->s_nextp = nextp;
35953 slabp->s_prevp = prevp;
35954 nextp->s_prevp = slabp;
35955 slabp->s_prevp->s_nextp = slabp;
35956 }
35957
35958 /* Destroy all the objs in a slab, and release the mem
35959 * back to the system. Before calling the slab must have
35960 * been unlinked from the cache. The cache-lock is not
35961 * held/needed. */
35962 static void
35963 kmem_slab_destroy(kmem_cache_t *cachep,
35964 kmem_slab_t *slabp)
35965 {
35966 if (cachep->c_dtor
35967 #if SLAB_DEBUG_SUPPORT
35968 || cachep->c_flags & (SLAB_POISON | SLAB_RED_ZONE)
35969 #endif /*SLAB_DEBUG_SUPPORT*/
35970 ) {
35971 /* Doesn't use the bufctl ptrs to find objs. */
35972 unsigned long num = cachep->c_num;
35973 void *objp = slabp->s_mem;
35974 do {
35975 #if SLAB_DEBUG_SUPPORT
35976 if (cachep->c_flags & SLAB_RED_ZONE) {
35977 if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1)
35978 printk(KERN_ERR "kmem_slab_destroy: "
35979 "Bad front redzone - %s\n",
35980 cachep->c_name);
35981 objp += BYTES_PER_WORD;
35982 if (*((unsigned long*)(objp+cachep->c_org_size))
35983 != SLAB_RED_MAGIC1)
35984 printk(KERN_ERR "kmem_slab_destroy: "
35985 "Bad rear redzone - %s\n",
35986 cachep->c_name);
35987 }
35988 if (cachep->c_dtor)
35989 #endif /*SLAB_DEBUG_SUPPORT*/
35990 (cachep->c_dtor)(objp, cachep, 0);
35991 #if SLAB_DEBUG_SUPPORT
35992 else if (cachep->c_flags & SLAB_POISON) {
35993 if (kmem_check_poison_obj(cachep, objp))
35994 printk(KERN_ERR "kmem_slab_destroy: "
35995 "Bad poison - %s\n", cachep->c_name);
35996 }
35997 if (cachep->c_flags & SLAB_RED_ZONE)
35998 objp -= BYTES_PER_WORD;
35999 #endif /* SLAB_DEBUG_SUPPORT */
36000 objp += cachep->c_offset;
36001 if (!slabp->s_index)
36002 objp += sizeof(kmem_bufctl_t);
36003 } while (--num);
36004 }
36005
36006 slabp->s_magic = SLAB_MAGIC_DESTROYED;
36007 if (slabp->s_index)
36008 kmem_cache_free(cachep->c_index_cachep,
36009 slabp->s_index);
36010 kmem_freepages(cachep, slabp->s_mem-slabp->s_offset);
36011 if (SLAB_OFF_SLAB(cachep->c_flags))
36012 kmem_cache_free(cache_slabp, slabp);
36013 }
36014
36015 /* Call the num objs, wastage, and bytes left over for a
36016 * given slab size. */
36017 static inline size_t
36018 kmem_cache_cal_waste(unsigned long gfporder, size_t size,
36019 size_t extra, unsigned long flags, size_t *left_over,
36020 unsigned long *num)
36021 {
36022 size_t wastage = PAGE_SIZE<<gfporder;
36023
36024 if (SLAB_OFF_SLAB(flags))
36025 gfporder = 0;
36026 else
36027 gfporder = slab_align_size;
36028 wastage -= gfporder;
36029 *num = wastage / size;
36030 wastage -= (*num * size);
36031 *left_over = wastage;
36032
36033 return (wastage + gfporder + (extra * *num));
36034 }
36035
36036 /* Create a cache: Returns a ptr to the cache on success,
36037 * NULL on failure. Cannot be called within a int, but
36038 * can be interrupted. NOTE: The 'name' is assumed to be
36039 * memory that is _not_ going to disappear. */
36040 kmem_cache_t *
36041 kmem_cache_create(const char *name, size_t size,
36042 size_t offset, unsigned long flags,
36043 void (*ctor)(void*, kmem_cache_t *, unsigned long),
36044 void (*dtor)(void*, kmem_cache_t *, unsigned long))
36045 {
36046 const char *func_nm= KERN_ERR "kmem_create: ";
36047 kmem_cache_t *searchp;
36048 kmem_cache_t *cachep=NULL;
36049 size_t extra;
36050 size_t left_over;
36051 size_t align;
36052
36053 /* Sanity checks... */
36054 #if SLAB_MGMT_CHECKS
36055 if (!name) {
36056 printk("%sNULL ptr\n", func_nm);
36057 goto opps;
36058 }
36059 if (in_interrupt()) {
36060 printk("%sCalled during int - %s\n", func_nm, name);
36061 goto opps;
36062 }
36063
36064 if (size < BYTES_PER_WORD) {
36065 printk("%sSize too small %d - %s\n",
36066 func_nm, (int) size, name);
36067 size = BYTES_PER_WORD;
36068 }
36069
36070 if (size > ((1<<SLAB_OBJ_MAX_ORDER)*PAGE_SIZE)) {
36071 printk("%sSize too large %d - %s\n",
36072 func_nm, (int) size, name);
36073 goto opps;
36074 }
36075
36076 if (dtor && !ctor) {
36077 /* Decon, but no con - doesn't make sense */
36078 printk("%sDecon but no con - %s\n", func_nm, name);
36079 goto opps;
36080 }
36081
36082 if (offset < 0 || offset > size) {
36083 printk("%sOffset weird %d - %s\n",
36084 func_nm, (int) offset, name);
36085 offset = 0;
36086 }
36087
36088 #if SLAB_DEBUG_SUPPORT
36089 if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
36090 /* No ctor, but inital state check requested */
36091 printk("%sNo con, but init state check requested - "
36092 "%s\n", func_nm, name);
36093 flags &= ~SLAB_DEBUG_INITIAL;
36094 }
36095
36096 if ((flags & SLAB_POISON) && ctor) {
36097 /* request for poisoning, but we can't do that with a
36098 * constructor */
36099 printk("%sPoisoning requested, but con given - %s\n",
36100 func_nm, name);
36101 flags &= ~SLAB_POISON;
36102 }
36103 #if 0
36104 if ((flags & SLAB_HIGH_PACK) && ctor) {
36105 printk("%sHigh pack requested, but con given - %s\n",
36106 func_nm, name);
36107 flags &= ~SLAB_HIGH_PACK;
36108 }
36109 if ((flags & SLAB_HIGH_PACK) &&
36110 (flags & (SLAB_POISON|SLAB_RED_ZONE))) {
36111 printk("%sHigh pack requested, but with "
36112 "poisoning/red-zoning - %s\n",
36113 func_nm, name);
36114 flags &= ~SLAB_HIGH_PACK;
36115 }
36116 #endif
36117 #endif /* SLAB_DEBUG_SUPPORT */
36118 #endif /* SLAB_MGMT_CHECKS */
36119
36120 /* Always checks flags, a caller might be expecting
36121 * debug support which isn't available. */
36122 if (flags & ~SLAB_C_MASK) {
36123 printk("%sIllgl flg %lX - %s\n",
36124 func_nm, flags, name);
36125 flags &= SLAB_C_MASK;
36126 }
36127
36128 /* Get cache's description obj. */
36129 cachep =
36130 (kmem_cache_t *) kmem_cache_alloc(&cache_cache,
36131 SLAB_KERNEL);
36132 if (!cachep)
36133 goto opps;
36134 memset(cachep, 0, sizeof(kmem_cache_t));
36135
36136 /* Check that size is in terms of words. This is
36137 * needed to avoid unaligned accesses for some archs
36138 * when redzoning is used, and makes sure any on-slab
36139 * bufctl's are also correctly aligned. */
36140 if (size & (BYTES_PER_WORD-1)) {
36141 size += (BYTES_PER_WORD-1);
36142 size &= ~(BYTES_PER_WORD-1);
36143 printk("%sForcing size word alignment - %s\n",
36144 func_nm, name);
36145 }
36146
36147 cachep->c_org_size = size;
36148 #if SLAB_DEBUG_SUPPORT
36149 if (flags & SLAB_RED_ZONE) {
36150 /* There is no point trying to honour cache alignment
36151 * when redzoning. */
36152 flags &= ~SLAB_HWCACHE_ALIGN;
36153 size += 2*BYTES_PER_WORD; /* words for redzone */
36154 }
36155 #endif /* SLAB_DEBUG_SUPPORT */
36156
36157 align = BYTES_PER_WORD;
36158 if (flags & SLAB_HWCACHE_ALIGN)
36159 align = L1_CACHE_BYTES;
36160
36161 /* Determine if the slab management and/or bufclts are
36162 * 'on' or 'off' slab. */
36163 extra = sizeof(kmem_bufctl_t);
36164 if (size < (PAGE_SIZE>>3)) {
36165 /* Size is small(ish). Use packing where bufctl size
36166 * per obj is low, and slab management is on-slab. */
36167 #if 0
36168 if ((flags & SLAB_HIGH_PACK)) {
36169 /* Special high packing for small objects (mainly
36170 * for vm_mapping structs, but others can use it).
36171 */
36172 if (size == (L1_CACHE_BYTES/4) ||
36173 size == (L1_CACHE_BYTES/2) ||
36174 size == L1_CACHE_BYTES) {
36175 /* The bufctl is stored with the object. */
36176 extra = 0;
36177 } else
36178 flags &= ~SLAB_HIGH_PACK;
36179 }
36180 #endif
36181 } else {
36182 /* Size is large, assume best to place the slab
36183 * management obj off-slab (should allow better
36184 * packing of objs). */
36185 flags |= SLAB_CFLGS_OFF_SLAB;
36186 if (!(size & ~PAGE_MASK) || size == (PAGE_SIZE/2) ||
36187 size == (PAGE_SIZE/4) || size == (PAGE_SIZE/8)) {
36188 /* To avoid waste the bufctls are off-slab... */
36189 flags |= SLAB_CFLGS_BUFCTL;
36190 extra = 0;
36191 } /* else slab management is off-slab, but freelist
36192 * pointers are on. */
36193 }
36194 size += extra;
36195
36196 if (flags & SLAB_HWCACHE_ALIGN) {
36197 /* Need to adjust size so that objs are cache
36198 * aligned. */
36199 if (size > (L1_CACHE_BYTES/2)) {
36200 size_t words = size % L1_CACHE_BYTES;
36201 if (words)
36202 size += (L1_CACHE_BYTES-words);
36203 } else {
36204 /* Small obj size, can get at least two per cache
36205 * line. */
36206 int num_per_line = L1_CACHE_BYTES/size;
36207 left_over = L1_CACHE_BYTES - (num_per_line*size);
36208 if (left_over) {
36209 /* Need to adjust size so objs cache align. */
36210 if (left_over%num_per_line) {
36211 /* Odd num of objs per line - fixup. */
36212 num_per_line--;
36213 left_over += size;
36214 }
36215 size += (left_over/num_per_line);
36216 }
36217 }
36218 } else if (!(size%L1_CACHE_BYTES)) {
36219 /* Size happens to cache align... */
36220 flags |= SLAB_HWCACHE_ALIGN;
36221 align = L1_CACHE_BYTES;
36222 }
36223
36224 /* Cal size (in pages) of slabs, and the num of objs
36225 * per slab. This could be made much more intelligent.
36226 * For now, try to avoid using high page-orders for
36227 * slabs. When the gfp() funcs are more friendly
36228 * towards high-order requests, this should be changed.
36229 */
36230 do {
36231 size_t wastage;
36232 unsigned int break_flag = 0;
36233 cal_wastage:
36234 wastage = kmem_cache_cal_waste(cachep->c_gfporder,
36235 size, extra, flags, &left_over, &cachep->c_num);
36236 if (!cachep->c_num)
36237 goto next;
36238 if (break_flag)
36239 break;
36240 if (SLAB_BUFCTL(flags) &&
36241 cachep->c_num > bufctl_limit) {
36242 /* Oops, this num of objs will cause problems. */
36243 cachep->c_gfporder--;
36244 break_flag++;
36245 goto cal_wastage;
36246 }
36247 if (cachep->c_gfporder == SLAB_MAX_GFP_ORDER)
36248 break;
36249
36250 /* Large num of objs is good, but v. large slabs are
36251 * currently bad for the gfp()s. */
36252 if (cachep->c_num <= SLAB_MIN_OBJS_PER_SLAB) {
36253 if (cachep->c_gfporder < slab_break_gfp_order)
36254 goto next;
36255 }
36256
36257 /* Stop caches with small objs having a large num of
36258 * pages. */
36259 if (left_over <= slab_align_size)
36260 break;
36261 if ((wastage*8) <= (PAGE_SIZE<<cachep->c_gfporder))
36262 break; /* Acceptable internal fragmentation. */
36263 next:
36264 cachep->c_gfporder++;
36265 } while (1);
36266
36267 /* If the slab has been placed off-slab, and we have
36268 * enough space then move it on-slab. This is at the
36269 * expense of any extra colouring. */
36270 if ((flags & SLAB_CFLGS_OFF_SLAB) &&
36271 !SLAB_BUFCTL(flags) &&
36272 left_over >= slab_align_size) {
36273 flags &= ~SLAB_CFLGS_OFF_SLAB;
36274 left_over -= slab_align_size;
36275 }
36276
36277 /* Offset must be a factor of the alignment. */
36278 offset += (align-1);
36279 offset &= ~(align-1);
36280
36281 /* Mess around with the offset alignment. */
36282 if (!left_over) {
36283 offset = 0;
36284 } else if (left_over < offset) {
36285 offset = align;
36286 if (flags & SLAB_HWCACHE_ALIGN) {
36287 if (left_over < offset)
36288 offset = 0;
36289 } else {
36290 /* Offset is BYTES_PER_WORD, and left_over is at
36291 * least BYTES_PER_WORD.
36292 */
36293 if (left_over >= (BYTES_PER_WORD*2)) {
36294 offset >>= 1;
36295 if (left_over >= (BYTES_PER_WORD*4))
36296 offset >>= 1;
36297 }
36298 }
36299 } else if (!offset) {
36300 /* No offset requested, but space enough - give
36301 * one. */
36302 offset = left_over/align;
36303 if (flags & SLAB_HWCACHE_ALIGN) {
36304 if (offset >= 8) {
36305 /* A large number of colours - use a larger
36306 * alignment. */
36307 align <<= 1;
36308 }
36309 } else {
36310 if (offset >= 10) {
36311 align <<= 1;
36312 if (offset >= 16)
36313 align <<= 1;
36314 }
36315 }
36316 offset = align;
36317 }
36318
36319 #if 0
36320 printk("%s: Left_over:%d Align:%d Size:%d\n",
36321 name, left_over, offset, size);
36322 #endif
36323
36324 if ((cachep->c_align = (unsigned long) offset))
36325 cachep->c_colour = (left_over/offset);
36326 cachep->c_colour_next = cachep->c_colour;
36327
36328 /* If the bufctl's are on-slab, c_offset does not
36329 * include the size of bufctl. */
36330 if (!SLAB_BUFCTL(flags))
36331 size -= sizeof(kmem_bufctl_t);
36332 else
36333 cachep->c_index_cachep =
36334 kmem_find_general_cachep(cachep->c_num *
36335 sizeof(kmem_bufctl_t));
36336 cachep->c_offset = (unsigned long) size;
36337 cachep->c_freep = kmem_slab_end(cachep);
36338 cachep->c_firstp = kmem_slab_end(cachep);
36339 cachep->c_lastp = kmem_slab_end(cachep);
36340 cachep->c_flags = flags;
36341 cachep->c_ctor = ctor;
36342 cachep->c_dtor = dtor;
36343 cachep->c_magic = SLAB_C_MAGIC;
36344 cachep->c_name = name; /* Simply point to the name. */
36345 spin_lock_init(&cachep->c_spinlock);
36346
36347 /* Need the semaphore to access the chain. */
36348 down(&cache_chain_sem);
36349 searchp = &cache_cache;
36350 do {
36351 /* The name field is constant - no lock needed. */
36352 if (!strcmp(searchp->c_name, name)) {
36353 printk("%sDup name - %s\n", func_nm, name);
36354 break;
36355 }
36356 searchp = searchp->c_nextp;
36357 } while (searchp != &cache_cache);
36358
36359 /* There is no reason to lock our new cache before we
36360 * link it in - no one knows about it yet...
36361 */
36362 cachep->c_nextp = cache_cache.c_nextp;
36363 cache_cache.c_nextp = cachep;
36364 up(&cache_chain_sem);
36365 opps:
36366 return cachep;
36367 }
36368
36369 /* Shrink a cache. Releases as many slabs as possible
36370 * for a cache. It is expected this function will be
36371 * called by a module when it is unloaded. The cache is
36372 * _not_ removed, this creates too many problems and the
36373 * cache-structure does not take up much room. A module
36374 * should keep its cache pointer(s) in unloaded memory,
36375 * so when reloaded it knows the cache is available. To
36376 * help debugging, a zero exit status indicates all slabs
36377 * were released. */
36378 int
36379 kmem_cache_shrink(kmem_cache_t *cachep)
36380 {
36381 kmem_cache_t *searchp;
36382 kmem_slab_t *slabp;
36383 int ret;
36384
36385 if (!cachep) {
36386 printk(KERN_ERR "kmem_shrink: NULL ptr\n");
36387 return 2;
36388 }
36389 if (in_interrupt()) {
36390 printk(KERN_ERR "kmem_shrink: Called during int - "
36391 "%s\n", cachep->c_name);
36392 return 2;
36393 }
36394
36395 /* Find the cache in the chain of caches. */
36396 down(&cache_chain_sem); /* Semaphore is needed. */
36397 searchp = &cache_cache;
36398 for (;searchp->c_nextp != &cache_cache;
36399 searchp = searchp->c_nextp) {
36400 if (searchp->c_nextp != cachep)
36401 continue;
36402
36403 /* Accessing clock_searchp is safe - we hold the
36404 * mutex. */
36405 if (cachep == clock_searchp)
36406 clock_searchp = cachep->c_nextp;
36407 goto found;
36408 }
36409 up(&cache_chain_sem);
36410 printk(KERN_ERR "kmem_shrink: Invalid cache addr %p\n",
36411 cachep);
36412 return 2;
36413 found:
36414 /* Release the semaphore before getting the cache-lock.
36415 * This could mean multiple engines are shrinking the
36416 * cache, but so what. */
36417 up(&cache_chain_sem);
36418 spin_lock_irq(&cachep->c_spinlock);
36419
36420 /* If the cache is growing, stop shrinking. */
36421 while (!cachep->c_growing) {
36422 slabp = cachep->c_lastp;
36423 if (slabp->s_inuse || slabp == kmem_slab_end(cachep))
36424 break;
36425 kmem_slab_unlink(slabp);
36426 spin_unlock_irq(&cachep->c_spinlock);
36427 kmem_slab_destroy(cachep, slabp);
36428 spin_lock_irq(&cachep->c_spinlock);
36429 }
36430 ret = 1;
36431 if (cachep->c_lastp == kmem_slab_end(cachep))
36432 ret--; /* Cache is empty. */
36433 spin_unlock_irq(&cachep->c_spinlock);
36434 return ret;
36435 }
36436
36437 /* Get the memory for a slab management obj. */
36438 static inline kmem_slab_t *
36439 kmem_cache_slabmgmt(kmem_cache_t *cachep, void *objp,
36440 int local_flags)
36441 {
36442 kmem_slab_t *slabp;
36443
36444 if (SLAB_OFF_SLAB(cachep->c_flags)) {
36445 /* Slab management obj is off-slab. */
36446 slabp = kmem_cache_alloc(cache_slabp, local_flags);
36447 } else {
36448 /* Slab management at end of slab memory, placed so
36449 * that the position is 'coloured'. */
36450 void *end;
36451 end = objp + (cachep->c_num * cachep->c_offset);
36452 if (!SLAB_BUFCTL(cachep->c_flags))
36453 end += (cachep->c_num * sizeof(kmem_bufctl_t));
36454 slabp =
36455 (kmem_slab_t *) L1_CACHE_ALIGN((unsigned long)end);
36456 }
36457
36458 if (slabp) {
36459 slabp->s_inuse = 0;
36460 slabp->s_dma = 0;
36461 slabp->s_index = NULL;
36462 }
36463
36464 return slabp;
36465 }
36466
36467 static inline void
36468 kmem_cache_init_objs(kmem_cache_t * cachep,
36469 kmem_slab_t * slabp, void *objp,
36470 unsigned long ctor_flags)
36471 {
36472 kmem_bufctl_t **bufpp = &slabp->s_freep;
36473 unsigned long num = cachep->c_num-1;
36474
36475 do {
36476 #if SLAB_DEBUG_SUPPORT
36477 if (cachep->c_flags & SLAB_RED_ZONE) {
36478 *((unsigned long*)(objp)) = SLAB_RED_MAGIC1;
36479 objp += BYTES_PER_WORD;
36480 *((unsigned long*)(objp+cachep->c_org_size)) =
36481 SLAB_RED_MAGIC1;
36482 }
36483 #endif /* SLAB_DEBUG_SUPPORT */
36484
36485 /* Constructors are not allowed to allocate memory
36486 * from the same cache which they are a constructor
36487 * for. Otherwise, deadlock. They must also be
36488 * threaded. */
36489 if (cachep->c_ctor)
36490 cachep->c_ctor(objp, cachep, ctor_flags);
36491 #if SLAB_DEBUG_SUPPORT
36492 else if (cachep->c_flags & SLAB_POISON) {
36493 /* need to poison the objs */
36494 kmem_poison_obj(cachep, objp);
36495 }
36496
36497 if (cachep->c_flags & SLAB_RED_ZONE) {
36498 if (*((unsigned long*)(objp+cachep->c_org_size)) !=
36499 SLAB_RED_MAGIC1) {
36500 *((unsigned long*)(objp+cachep->c_org_size)) =
36501 SLAB_RED_MAGIC1;
36502 printk(KERN_ERR
36503 "kmem_init_obj: Bad rear redzone "
36504 "after constructor - %s\n",
36505 cachep->c_name);
36506 }
36507 objp -= BYTES_PER_WORD;
36508 if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1) {
36509 *((unsigned long*)(objp)) = SLAB_RED_MAGIC1;
36510 printk(KERN_ERR
36511 "kmem_init_obj: Bad front redzone "
36512 "after constructor - %s\n",
36513 cachep->c_name);
36514 }
36515 }
36516 #endif /* SLAB_DEBUG_SUPPORT */
36517
36518 objp += cachep->c_offset;
36519 if (!slabp->s_index) {
36520 *bufpp = objp;
36521 objp += sizeof(kmem_bufctl_t);
36522 } else
36523 *bufpp = &slabp->s_index[num];
36524 bufpp = &(*bufpp)->buf_nextp;
36525 } while (num--);
36526
36527 *bufpp = NULL;
36528 }
36529
36530 /* Grow (by 1) the number of slabs within a cache. This
36531 * is called by kmem_cache_alloc() when there are no
36532 * active objs left in a cache. */
36533 static int
36534 kmem_cache_grow(kmem_cache_t * cachep, int flags)
36535 {
36536 kmem_slab_t *slabp;
36537 struct page *page;
36538 void *objp;
36539 size_t offset;
36540 unsigned int dma, local_flags;
36541 unsigned long ctor_flags;
36542 unsigned long save_flags;
36543
36544 /* Be lazy and only check for valid flags here, keeping
36545 * it out of the critical path in kmem_cache_alloc().
36546 */
36547 if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) {
36548 printk(KERN_WARNING "kmem_grow: Illegal flgs %X "
36549 "(correcting) - %s\n", flags, cachep->c_name);
36550 flags &= (SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW);
36551 }
36552
36553 if (flags & SLAB_NO_GROW)
36554 return 0;
36555
36556 /* The test for missing atomic flag is performed here,
36557 * rather than the more obvious place, simply to reduce
36558 * the critical path length in kmem_cache_alloc(). If
36559 * a caller is slightly mis-behaving they will
36560 * eventually be caught here (where it matters). */
36561 if (in_interrupt() &&
36562 (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC) {
36563 printk(KERN_ERR "kmem_grow: Called nonatomically "
36564 "from int - %s\n", cachep->c_name);
36565 flags &= ~SLAB_LEVEL_MASK;
36566 flags |= SLAB_ATOMIC;
36567 }
36568 ctor_flags = SLAB_CTOR_CONSTRUCTOR;
36569 local_flags = (flags & SLAB_LEVEL_MASK);
36570 if (local_flags == SLAB_ATOMIC) {
36571 /* Not allowed to sleep. Need to tell a constructor
36572 * about this - it might need to know... */
36573 ctor_flags |= SLAB_CTOR_ATOMIC;
36574 }
36575
36576 /* About to mess with non-constant members - lock. */
36577 spin_lock_irqsave(&cachep->c_spinlock, save_flags);
36578
36579 /* Get colour for the slab, and cal the next value. */
36580 if (!(offset = cachep->c_colour_next--))
36581 cachep->c_colour_next = cachep->c_colour;
36582 offset *= cachep->c_align;
36583 cachep->c_dflags = SLAB_CFLGS_GROWN;
36584
36585 cachep->c_growing++;
36586 spin_unlock_irqrestore(&cachep->c_spinlock,
36587 save_flags);
36588
36589 /* A series of memory allocations for a new slab.
36590 * Neither the cache-chain semaphore, or cache-lock,
36591 * are held, but the incrementing c_growing prevents
36592 * this this cache from being reaped or shrunk. Note:
36593 * The cache could be selected in for reaping in
36594 * kmem_cache_reap(), but when the final test is made
36595 * the growing value will be seen. */
36596
36597 /* Get mem for the objs. */
36598 if (!(objp = kmem_getpages(cachep, flags, &dma)))
36599 goto failed;
36600
36601 /* Get slab management. */
36602 if (!(slabp = kmem_cache_slabmgmt(cachep,
36603 objp+offset,
36604 local_flags)))
36605 goto opps1;
36606 if (dma)
36607 slabp->s_dma = 1;
36608 if (SLAB_BUFCTL(cachep->c_flags)) {
36609 slabp->s_index =
36610 kmem_cache_alloc(cachep->c_index_cachep,
36611 local_flags);
36612 if (!slabp->s_index)
36613 goto opps2;
36614 }
36615
36616 /* Nasty!!!!!! I hope this is OK. */
36617 dma = 1 << cachep->c_gfporder;
36618 page = &mem_map[MAP_NR(objp)];
36619 do {
36620 SLAB_SET_PAGE_CACHE(page, cachep);
36621 SLAB_SET_PAGE_SLAB(page, slabp);
36622 PageSetSlab(page);
36623 page++;
36624 } while (--dma);
36625
36626 slabp->s_offset = offset; /* It will fit... */
36627 objp += offset; /* Address of first object. */
36628 slabp->s_mem = objp;
36629
36630 /* For on-slab bufctls, c_offset is the distance
36631 * between the start of an obj and its related bufctl.
36632 * For off-slab bufctls, c_offset is the distance
36633 * between objs in the slab. */
36634 kmem_cache_init_objs(cachep, slabp, objp, ctor_flags);
36635
36636 spin_lock_irq(&cachep->c_spinlock);
36637
36638 /* Make slab active. */
36639 slabp->s_magic = SLAB_MAGIC_ALLOC;
36640 kmem_slab_link_end(cachep, slabp);
36641 if (cachep->c_freep == kmem_slab_end(cachep))
36642 cachep->c_freep = slabp;
36643 SLAB_STATS_INC_GROWN(cachep);
36644 cachep->c_failures = 0;
36645 cachep->c_growing--;
36646
36647 spin_unlock_irqrestore(&cachep->c_spinlock,
36648 save_flags);
36649 return 1;
36650 opps2:
36651 if (SLAB_OFF_SLAB(cachep->c_flags))
36652 kmem_cache_free(cache_slabp, slabp);
36653 opps1:
36654 kmem_freepages(cachep, objp);
36655 failed:
36656 spin_lock_irq(&cachep->c_spinlock);
36657 cachep->c_growing--;
36658 spin_unlock_irqrestore(&cachep->c_spinlock,
36659 save_flags);
36660 return 0;
36661 }
36662
36663 static void
36664 kmem_report_alloc_err(const char *str,
36665 kmem_cache_t * cachep)
36666 {
36667 if (cachep)
36668 SLAB_STATS_INC_ERR(cachep); /* this is atomic */
36669 printk(KERN_ERR "kmem_alloc: %s (name=%s)\n",
36670 str, cachep ? cachep->c_name : "unknown");
36671 }
36672
36673 static void
36674 kmem_report_free_err(const char *str, const void *objp,
36675 kmem_cache_t * cachep)
36676 {
36677 if (cachep)
36678 SLAB_STATS_INC_ERR(cachep);
36679 printk(KERN_ERR "kmem_free: %s (objp=%p, name=%s)\n",
36680 str, objp, cachep ? cachep->c_name : "unknown");
36681 }
36682
36683 /* Search for a slab whose objs are suitable for DMA.
36684 * Note: since testing the first free slab (in
36685 * __kmem_cache_alloc()), ints must not have been
36686 * enabled, or the cache-lock released! */
36687 static inline kmem_slab_t *
36688 kmem_cache_search_dma(kmem_cache_t * cachep)
36689 {
36690 kmem_slab_t *slabp = cachep->c_freep->s_nextp;
36691
36692 for (; slabp != kmem_slab_end(cachep);
36693 slabp = slabp->s_nextp) {
36694 if (!(slabp->s_dma))
36695 continue;
36696 kmem_slab_unlink(slabp);
36697 kmem_slab_link_free(cachep, slabp);
36698 cachep->c_freep = slabp;
36699 break;
36700 }
36701 return slabp;
36702 }
36703
36704 #if SLAB_DEBUG_SUPPORT
36705 /* Perform extra freeing checks. Currently, this check
36706 * is only for caches that use bufctl structures within
36707 * the slab. Those which use bufctl's from the internal
36708 * cache have a reasonable check when the address is
36709 * searched for. Called with the cache-lock held. */
36710 static void *
36711 kmem_extra_free_checks(kmem_cache_t * cachep,
36712 kmem_bufctl_t *search_bufp,
36713 kmem_bufctl_t *bufp, void * objp)
36714 {
36715 if (SLAB_BUFCTL(cachep->c_flags))
36716 return objp;
36717
36718 /* Check slab's freelist to see if this obj is
36719 * there. */
36720 for (; search_bufp;
36721 search_bufp = search_bufp->buf_nextp) {
36722 if (search_bufp != bufp)
36723 continue;
36724 return NULL;
36725 }
36726 return objp;
36727 }
36728 #endif /* SLAB_DEBUG_SUPPORT */
36729
36730 /* Called with cache lock held. */
36731 static inline void
36732 kmem_cache_full_free(kmem_cache_t *cachep,
36733 kmem_slab_t *slabp)
36734 {
36735 if (slabp->s_nextp->s_inuse) {
36736 /* Not at correct position. */
36737 if (cachep->c_freep == slabp)
36738 cachep->c_freep = slabp->s_nextp;
36739 kmem_slab_unlink(slabp);
36740 kmem_slab_link_end(cachep, slabp);
36741 }
36742 }
36743
36744 /* Called with cache lock held. */
36745 static inline void
36746 kmem_cache_one_free(kmem_cache_t *cachep,
36747 kmem_slab_t *slabp)
36748 {
36749 if (slabp->s_nextp->s_inuse == cachep->c_num) {
36750 kmem_slab_unlink(slabp);
36751 kmem_slab_link_free(cachep, slabp);
36752 }
36753 cachep->c_freep = slabp;
36754 }
36755
36756 /* Returns a ptr to an obj in the given cache. */
36757 static inline void *
36758 __kmem_cache_alloc(kmem_cache_t *cachep, int flags)
36759 {
36760 kmem_slab_t *slabp;
36761 kmem_bufctl_t *bufp;
36762 void *objp;
36763 unsigned long save_flags;
36764
36765 /* Sanity check. */
36766 if (!cachep)
36767 goto nul_ptr;
36768 spin_lock_irqsave(&cachep->c_spinlock, save_flags);
36769 try_again:
36770 /* Get slab alloc is to come from. */
36771 slabp = cachep->c_freep;
36772
36773 /* Magic is a sanity check _and_ says if we need a new
36774 * slab. */
36775 if (slabp->s_magic != SLAB_MAGIC_ALLOC)
36776 goto alloc_new_slab;
36777 /* DMA requests are 'rare' - keep out of the critical
36778 * path. */
36779 if (flags & SLAB_DMA)
36780 goto search_dma;
36781 try_again_dma:
36782 SLAB_STATS_INC_ALLOCED(cachep);
36783 SLAB_STATS_INC_ACTIVE(cachep);
36784 SLAB_STATS_SET_HIGH(cachep);
36785 slabp->s_inuse++;
36786 bufp = slabp->s_freep;
36787 slabp->s_freep = bufp->buf_nextp;
36788 if (slabp->s_freep) {
36789 ret_obj:
36790 if (!slabp->s_index) {
36791 bufp->buf_slabp = slabp;
36792 objp = ((void*)bufp) - cachep->c_offset;
36793 finished:
36794 /* The lock is not needed by the red-zone or poison
36795 * ops, and the obj has been removed from the slab.
36796 * Should be safe to drop the lock here. */
36797 spin_unlock_irqrestore(&cachep->c_spinlock,
36798 save_flags);
36799 #if SLAB_DEBUG_SUPPORT
36800 if (cachep->c_flags & SLAB_RED_ZONE)
36801 goto red_zone;
36802 ret_red:
36803 if ((cachep->c_flags & SLAB_POISON) &&
36804 kmem_check_poison_obj(cachep, objp))
36805 kmem_report_alloc_err("Bad poison", cachep);
36806 #endif /* SLAB_DEBUG_SUPPORT */
36807 return objp;
36808 }
36809 /* Update index ptr. */
36810 objp = ((bufp-slabp->s_index)*cachep->c_offset) +
36811 slabp->s_mem;
36812 bufp->buf_objp = objp;
36813 goto finished;
36814 }
36815 cachep->c_freep = slabp->s_nextp;
36816 goto ret_obj;
36817
36818 #if SLAB_DEBUG_SUPPORT
36819 red_zone:
36820 /* Set alloc red-zone, and check old one. */
36821 if (xchg((unsigned long *)objp, SLAB_RED_MAGIC2)
36822 != SLAB_RED_MAGIC1)
36823 kmem_report_alloc_err("Bad front redzone", cachep);
36824 objp += BYTES_PER_WORD;
36825 if (xchg((unsigned long *)(objp+cachep->c_org_size),
36826 SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1)
36827 kmem_report_alloc_err("Bad rear redzone", cachep);
36828 goto ret_red;
36829 #endif /* SLAB_DEBUG_SUPPORT */
36830
36831 search_dma:
36832 if (slabp->s_dma ||
36833 (slabp = kmem_cache_search_dma(cachep)) !=
36834 kmem_slab_end(cachep))
36835 goto try_again_dma;
36836 alloc_new_slab:
36837 /* Either out of slabs, or magic number corruption. */
36838 if (slabp == kmem_slab_end(cachep)) {
36839 /* Need a new slab. Release the lock before calling
36840 * kmem_cache_grow(). This allows objs to be
36841 * released back into the cache while growing. */
36842 spin_unlock_irqrestore(&cachep->c_spinlock,
36843 save_flags);
36844 if (kmem_cache_grow(cachep, flags)) {
36845 /* Someone may have stolen our objs. Doesn't
36846 * matter, we'll just come back here again. */
36847 spin_lock_irq(&cachep->c_spinlock);
36848 goto try_again;
36849 }
36850 /* Couldn't grow, but some objs may have been
36851 * freed. */
36852 spin_lock_irq(&cachep->c_spinlock);
36853 if (cachep->c_freep != kmem_slab_end(cachep)) {
36854 if ((flags & SLAB_ATOMIC) == 0)
36855 goto try_again;
36856 }
36857 } else {
36858 /* Very serious error - maybe panic() here? */
36859 kmem_report_alloc_err("Bad slab magic (corrupt)",
36860 cachep);
36861 }
36862 spin_unlock_irqrestore(&cachep->c_spinlock,
36863 save_flags);
36864 err_exit:
36865 return NULL;
36866 nul_ptr:
36867 kmem_report_alloc_err("NULL ptr", NULL);
36868 goto err_exit;
36869 }
36870
36871 /* Release an obj back to its cache. If the obj has a
36872 * constructed state, it should be in this state _before_
36873 * it is released. */
36874 static inline void
36875 __kmem_cache_free(kmem_cache_t *cachep, const void *objp)
36876 {
36877 kmem_slab_t *slabp;
36878 kmem_bufctl_t *bufp;
36879 unsigned long save_flags;
36880
36881 /* Basic sanity checks. */
36882 if (!cachep || !objp)
36883 goto null_addr;
36884
36885 #if SLAB_DEBUG_SUPPORT
36886 /* A verify func is called without the cache-lock
36887 * held. */
36888 if (cachep->c_flags & SLAB_DEBUG_INITIAL)
36889 goto init_state_check;
36890 finished_initial:
36891
36892 if (cachep->c_flags & SLAB_RED_ZONE)
36893 goto red_zone;
36894 return_red:
36895 #endif /* SLAB_DEBUG_SUPPORT */
36896
36897 spin_lock_irqsave(&cachep->c_spinlock, save_flags);
36898
36899 if (SLAB_BUFCTL(cachep->c_flags))
36900 goto bufctl;
36901 bufp = (kmem_bufctl_t *)(objp+cachep->c_offset);
36902
36903 /* Get slab for the object. */
36904 #if 0
36905 /* _NASTY_IF/ELSE_, but avoids a 'distant' memory ref
36906 * for some objects. Is this worth while? XXX */
36907 if (cachep->c_flags & SLAB_HIGH_PACK)
36908 slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(bufp)]);
36909 else
36910 #endif
36911 slabp = bufp->buf_slabp;
36912
36913 check_magic:
36914 /* Sanity check. */
36915 if (slabp->s_magic != SLAB_MAGIC_ALLOC)
36916 goto bad_slab;
36917
36918 #if SLAB_DEBUG_SUPPORT
36919 if (cachep->c_flags & SLAB_DEBUG_FREE)
36920 goto extra_checks;
36921 passed_extra:
36922 #endif /* SLAB_DEBUG_SUPPORT */
36923
36924 if (slabp->s_inuse) { /* Sanity check. */
36925 SLAB_STATS_DEC_ACTIVE(cachep);
36926 slabp->s_inuse--;
36927 bufp->buf_nextp = slabp->s_freep;
36928 slabp->s_freep = bufp;
36929 if (bufp->buf_nextp) {
36930 if (slabp->s_inuse) {
36931 /* (hopefully) The most common case. */
36932 finished:
36933 #if SLAB_DEBUG_SUPPORT
36934 if (cachep->c_flags & SLAB_POISON) {
36935 if (cachep->c_flags & SLAB_RED_ZONE)
36936 objp += BYTES_PER_WORD;
36937 kmem_poison_obj(cachep, objp);
36938 }
36939 #endif /* SLAB_DEBUG_SUPPORT */
36940 spin_unlock_irqrestore(&cachep->c_spinlock,
36941 save_flags);
36942 return;
36943 }
36944 kmem_cache_full_free(cachep, slabp);
36945 goto finished;
36946 }
36947 kmem_cache_one_free(cachep, slabp);
36948 goto finished;
36949 }
36950
36951 /* Don't add to freelist. */
36952 spin_unlock_irqrestore(&cachep->c_spinlock,
36953 save_flags);
36954 kmem_report_free_err("free with no active objs",
36955 objp, cachep);
36956 return;
36957 bufctl:
36958 /* No 'extra' checks are performed for objs stored this
36959 * way, finding the obj is check enough. */
36960 slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(objp)]);
36961 bufp = &slabp->s_index[(objp - slabp->s_mem) /
36962 cachep->c_offset];
36963 if (bufp->buf_objp == objp)
36964 goto check_magic;
36965 spin_unlock_irqrestore(&cachep->c_spinlock,
36966 save_flags);
36967 kmem_report_free_err("Either bad obj addr or double "
36968 "free", objp, cachep);
36969 return;
36970 #if SLAB_DEBUG_SUPPORT
36971 init_state_check:
36972 /* Need to call the slab's constructor so the caller
36973 * can perform a verify of its state (debugging). */
36974 cachep->c_ctor(objp, cachep,
36975 SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
36976 goto finished_initial;
36977 extra_checks:
36978 if (!kmem_extra_free_checks(cachep, slabp->s_freep,
36979 bufp, objp)) {
36980 spin_unlock_irqrestore(&cachep->c_spinlock,
36981 save_flags);
36982 kmem_report_free_err("Double free detected during "
36983 "checks", objp, cachep);
36984 return;
36985 }
36986 goto passed_extra;
36987 red_zone:
36988 /* We do not hold the cache-lock while checking the
36989 * red-zone. */
36990 objp -= BYTES_PER_WORD;
36991 if (xchg((unsigned long *)objp, SLAB_RED_MAGIC1) !=
36992 SLAB_RED_MAGIC2) {
36993 /* Either write before start of obj, or a double
36994 * free. */
36995 kmem_report_free_err("Bad front redzone", objp,
36996 cachep);
36997 }
36998 if (xchg((unsigned long *)
36999 (objp+cachep->c_org_size+BYTES_PER_WORD),
37000 SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) {
37001 /* Either write past end of obj, or a double free. */
37002 kmem_report_free_err("Bad rear redzone",
37003 objp, cachep);
37004 }
37005 goto return_red;
37006 #endif /* SLAB_DEBUG_SUPPORT */
37007
37008 bad_slab:
37009 /* Slab doesn't contain the correct magic num. */
37010 if (slabp->s_magic == SLAB_MAGIC_DESTROYED) {
37011 /* Magic num says this is a destroyed slab. */
37012 kmem_report_free_err("free from inactive slab",
37013 objp, cachep);
37014 } else
37015 kmem_report_free_err("Bad obj addr", objp, cachep);
37016 spin_unlock_irqrestore(&cachep->c_spinlock,
37017 save_flags);
37018
37019 #if 1
37020 /* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL
37021 * CAPS. GET THE CALL CHAIN. */
37022 *(int *) 0 = 0;
37023 #endif
37024
37025 return;
37026 null_addr:
37027 kmem_report_free_err("NULL ptr", objp, cachep);
37028 return;
37029 }
37030
37031 void *
37032 kmem_cache_alloc(kmem_cache_t *cachep, int flags)
37033 {
37034 return __kmem_cache_alloc(cachep, flags);
37035 }
37036
37037 void
37038 kmem_cache_free(kmem_cache_t *cachep, void *objp)
37039 {
37040 __kmem_cache_free(cachep, objp);
37041 }
37042
37043 void *
37044 kmalloc(size_t size, int flags)
37045 {
37046 cache_sizes_t *csizep = cache_sizes;
37047
37048 for (; csizep->cs_size; csizep++) {
37049 if (size > csizep->cs_size)
37050 continue;
37051 return __kmem_cache_alloc(csizep->cs_cachep, flags);
37052 }
37053 printk(KERN_ERR "kmalloc: Size (%lu) too large\n",
37054 (unsigned long) size);
37055 return NULL;
37056 }
37057
37058 void
37059 kfree(const void *objp)
37060 {
37061 struct page *page;
37062 int nr;
37063
37064 if (!objp)
37065 goto null_ptr;
37066 nr = MAP_NR(objp);
37067 if (nr >= max_mapnr)
37068 goto bad_ptr;
37069
37070 /* Assume we own the page structure - hence no locking.
37071 * If someone is misbehaving (for example, calling us
37072 * with a bad address), then access to the page
37073 * structure can race with the kmem_slab_destroy()
37074 * code. Need to add a spin_lock to each page
37075 * structure, which would be useful in threading the
37076 * gfp() functions.... */
37077 page = &mem_map[nr];
37078 if (PageSlab(page)) {
37079 kmem_cache_t *cachep;
37080
37081 /* Here, we again assume the obj address is good. If
37082 * it isn't, and happens to map onto another general
37083 * cache page which has no active objs, then we race.
37084 */
37085 cachep = SLAB_GET_PAGE_CACHE(page);
37086 if (cachep &&
37087 (cachep->c_flags & SLAB_CFLGS_GENERAL)) {
37088 __kmem_cache_free(cachep, objp);
37089 return;
37090 }
37091 }
37092 bad_ptr:
37093 printk(KERN_ERR "kfree: Bad obj %p\n", objp);
37094
37095 #if 1
37096 /* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL
37097 * CAPS. GET THE CALL CHAIN. */
37098 *(int *) 0 = 0;
37099 #endif
37100
37101 null_ptr:
37102 return;
37103 }
37104
37105 void
37106 kfree_s(const void *objp, size_t size)
37107 {
37108 struct page *page;
37109 int nr;
37110
37111 if (!objp)
37112 goto null_ptr;
37113 nr = MAP_NR(objp);
37114 if (nr >= max_mapnr)
37115 goto null_ptr;
37116 /* See comment in kfree() */
37117 page = &mem_map[nr];
37118 if (PageSlab(page)) {
37119 kmem_cache_t *cachep;
37120 /* See comment in kfree() */
37121 cachep = SLAB_GET_PAGE_CACHE(page);
37122 if (cachep && cachep->c_flags & SLAB_CFLGS_GENERAL) {
37123 if (size <= cachep->c_org_size) {
37124 /* XXX better check */
37125 __kmem_cache_free(cachep, objp);
37126 return;
37127 }
37128 }
37129 }
37130 null_ptr:
37131 printk(KERN_ERR "kfree_s: Bad obj %p\n", objp);
37132 return;
37133 }
37134
37135 kmem_cache_t *
37136 kmem_find_general_cachep(size_t size)
37137 {
37138 cache_sizes_t *csizep = cache_sizes;
37139
37140 /* This function could be moved to the header file, and
37141 * made inline so consumers can quickly determine what
37142 * cache pointer they require.
37143 */
37144 for (; csizep->cs_size; csizep++) {
37145 if (size > csizep->cs_size)
37146 continue;
37147 break;
37148 }
37149 return csizep->cs_cachep;
37150 }
37151
37152
37153 /* Called from try_to_free_page().
37154 * This function _cannot_ be called within a int, but it
37155 * can be interrupted.
37156 */
37157 void
37158 kmem_cache_reap(int gfp_mask)
37159 {
37160 kmem_slab_t *slabp;
37161 kmem_cache_t *searchp;
37162 kmem_cache_t *best_cachep;
37163 unsigned int scan;
37164 unsigned int reap_level;
37165
37166 if (in_interrupt()) {
37167 printk("kmem_cache_reap() called within int!\n");
37168 return;
37169 }
37170
37171 /* We really need a test semaphore op so we can avoid
37172 * sleeping when !wait is true. */
37173 down(&cache_chain_sem);
37174
37175 scan = 10;
37176 reap_level = 0;
37177
37178 best_cachep = NULL;
37179 searchp = clock_searchp;
37180 do {
37181 unsigned int full_free;
37182 unsigned int dma_flag;
37183
37184 /* It's safe to test this without holding the
37185 * cache-lock. */
37186 if (searchp->c_flags & SLAB_NO_REAP)
37187 goto next;
37188 spin_lock_irq(&searchp->c_spinlock);
37189 if (searchp->c_growing)
37190 goto next_unlock;
37191 if (searchp->c_dflags & SLAB_CFLGS_GROWN) {
37192 searchp->c_dflags &= ~SLAB_CFLGS_GROWN;
37193 goto next_unlock;
37194 }
37195 /* Sanity check for corruption of static values. */
37196 if (searchp->c_inuse ||
37197 searchp->c_magic != SLAB_C_MAGIC) {
37198 spin_unlock_irq(&searchp->c_spinlock);
37199 printk(KERN_ERR "kmem_reap: Corrupted cache struct"
37200 " for %s\n", searchp->c_name);
37201 goto next;
37202 }
37203 dma_flag = 0;
37204 full_free = 0;
37205
37206 /* Count the fully free slabs. There should not be
37207 * not many, since we are holding the cache lock. */
37208 slabp = searchp->c_lastp;
37209 while (!slabp->s_inuse &&
37210 slabp != kmem_slab_end(searchp)) {
37211 slabp = slabp->s_prevp;
37212 full_free++;
37213 if (slabp->s_dma)
37214 dma_flag++;
37215 }
37216 spin_unlock_irq(&searchp->c_spinlock);
37217
37218 if ((gfp_mask & GFP_DMA) && !dma_flag)
37219 goto next;
37220
37221 if (full_free) {
37222 if (full_free >= 10) {
37223 best_cachep = searchp;
37224 break;
37225 }
37226
37227 /* Try to avoid slabs with constructors and/or more
37228 * than one page per slab (as it can be difficult
37229 * to get high orders from gfp()). */
37230 if (full_free >= reap_level) {
37231 reap_level = full_free;
37232 best_cachep = searchp;
37233 }
37234 }
37235 goto next;
37236 next_unlock:
37237 spin_unlock_irq(&searchp->c_spinlock);
37238 next:
37239 searchp = searchp->c_nextp;
37240 } while (--scan && searchp != clock_searchp);
37241
37242 clock_searchp = searchp;
37243 up(&cache_chain_sem);
37244
37245 if (!best_cachep) {
37246 /* couldn't find anything to reap */
37247 return;
37248 }
37249
37250 spin_lock_irq(&best_cachep->c_spinlock);
37251 while (!best_cachep->c_growing &&
37252 !(slabp = best_cachep->c_lastp)->s_inuse &&
37253 slabp != kmem_slab_end(best_cachep)) {
37254 if (gfp_mask & GFP_DMA) {
37255 do {
37256 if (slabp->s_dma)
37257 goto good_dma;
37258 slabp = slabp->s_prevp;
37259 } while (!slabp->s_inuse &&
37260 slabp != kmem_slab_end(best_cachep));
37261
37262 /* Didn't found a DMA slab (there was a free one -
37263 * must have been become active). */
37264 goto dma_fail;
37265 good_dma:
37266 }
37267 if (slabp == best_cachep->c_freep)
37268 best_cachep->c_freep = slabp->s_nextp;
37269 kmem_slab_unlink(slabp);
37270 SLAB_STATS_INC_REAPED(best_cachep);
37271
37272 /* Safe to drop the lock. The slab is no longer
37273 * linked to the cache. */
37274 spin_unlock_irq(&best_cachep->c_spinlock);
37275 kmem_slab_destroy(best_cachep, slabp);
37276 spin_lock_irq(&best_cachep->c_spinlock);
37277 }
37278 dma_fail:
37279 spin_unlock_irq(&best_cachep->c_spinlock);
37280 return;
37281 }
37282
37283 #if SLAB_SELFTEST
37284 /* A few v. simple tests */
37285 static void
37286 kmem_self_test(void)
37287 {
37288 kmem_cache_t *test_cachep;
37289
37290 printk(KERN_INFO "kmem_test() - start\n");
37291 test_cachep =
37292 kmem_cache_create("test-cachep", 16, 0,
37293 SLAB_RED_ZONE|SLAB_POISON,
37294 NULL, NULL);
37295 if (test_cachep) {
37296 char *objp =
37297 kmem_cache_alloc(test_cachep, SLAB_KERNEL);
37298 if (objp) {
37299 /* Write in front and past end, red-zone test. */
37300 *(objp-1) = 1;
37301 *(objp+16) = 1;
37302 kmem_cache_free(test_cachep, objp);
37303
37304 /* Mess up poisoning. */
37305 *objp = 10;
37306 objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL);
37307 kmem_cache_free(test_cachep, objp);
37308
37309 /* Mess up poisoning (again). */
37310 *objp = 10;
37311 kmem_cache_shrink(test_cachep);
37312 }
37313 }
37314 printk(KERN_INFO "kmem_test() - finished\n");
37315 }
37316 #endif /* SLAB_SELFTEST */
37317
37318 #if defined(CONFIG_PROC_FS)
37319 /* /proc/slabinfo
37320 * cache-name num-active-objs total-objs num-active-slabs
37321 * ... total-slabs num-pages-per-slab
37322 */
37323 int
37324 get_slabinfo(char *buf)
37325 {
37326 kmem_cache_t *cachep;
37327 kmem_slab_t *slabp;
37328 unsigned long active_objs;
37329 unsigned long save_flags;
37330 unsigned long num_slabs;
37331 unsigned long num_objs;
37332 int len=0;
37333 #if SLAB_STATS
37334 unsigned long active_slabs;
37335 #endif /* SLAB_STATS */
37336
37337 __save_flags(save_flags);
37338
37339 /* Output format version, so at least we can change it
37340 * without _too_ many complaints. */
37341 #if SLAB_STATS
37342 len = sprintf(buf,
37343 "slabinfo - version: 1.0 (statistics)\n");
37344 #else
37345 len = sprintf(buf, "slabinfo - version: 1.0\n");
37346 #endif /* SLAB_STATS */
37347 down(&cache_chain_sem);
37348 cachep = &cache_cache;
37349 do {
37350 #if SLAB_STATS
37351 active_slabs = 0;
37352 #endif /* SLAB_STATS */
37353 num_slabs = active_objs = 0;
37354 spin_lock_irq(&cachep->c_spinlock);
37355 for (slabp = cachep->c_firstp;
37356 slabp != kmem_slab_end(cachep);
37357 slabp = slabp->s_nextp) {
37358 active_objs += slabp->s_inuse;
37359 num_slabs++;
37360 #if SLAB_STATS
37361 if (slabp->s_inuse)
37362 active_slabs++;
37363 #endif /* SLAB_STATS */
37364 }
37365 num_objs = cachep->c_num*num_slabs;
37366 #if SLAB_STATS
37367 {
37368 unsigned long errors;
37369 unsigned long high = cachep->c_high_mark;
37370 unsigned long grown = cachep->c_grown;
37371 unsigned long reaped = cachep->c_reaped;
37372 unsigned long allocs = cachep->c_num_allocations;
37373 errors =
37374 (unsigned long) atomic_read(&cachep->c_errors);
37375 spin_unlock_irqrestore(&cachep->c_spinlock,
37376 save_flags);
37377 len += sprintf(buf+len,
37378 "%-16s %6lu %6lu %4lu %4lu %4lu "
37379 "%6lu %7lu %5lu %4lu %4lu\n",
37380 cachep->c_name, active_objs,
37381 num_objs, active_slabs, num_slabs,
37382 (1<<cachep->c_gfporder)*num_slabs,
37383 high, allocs, grown, reaped,errors);
37384 }
37385 #else
37386 spin_unlock_irqrestore(&cachep->c_spinlock,
37387 save_flags);
37388 len += sprintf(buf+len, "%-17s %6lu %6lu\n",
37389 cachep->c_name, active_objs,num_objs);
37390 #endif /* SLAB_STATS */
37391 } while ((cachep = cachep->c_nextp) != &cache_cache);
37392 up(&cache_chain_sem);
37393
37394 return len;
37395 }
37396 #endif /* CONFIG_PROC_FS */
Сайт управляется системой
uCoz