netlib.narod.ru< Назад | Оглавление | Далее >

mm/slab.c

35266 /*
35267  * linux/mm/slab.c
35268  * Written by Mark Hemment, 1996/97.
35269  * (markhe@nextd.demon.co.uk)
35270  *
35271  * 11 April '97.  Started multi-threading - markhe
35272  *      The global cache-chain is protected by the
35273  *      semaphore 'cache_chain_sem'.  The sem is only
35274  *      needed when accessing/extending the cache-chain,
35275  *      which can never happen inside an interrupt
35276  *      (kmem_cache_create(), kmem_cache_shrink() and
35277  *      kmem_cache_reap()).  This is a medium-term
35278  *      exclusion lock.
35279  *
35280  *      Each cache has its own lock; 'c_spinlock'.  This
35281  *      lock is needed only when accessing non-constant
35282  *      members of a cache-struct.  Note: 'constant
35283  *      members' are assigned a value in
35284  *      kmem_cache_create() before the cache is linked
35285  *      into the cache-chain.  The values never change,
35286  *      so not even a multi-reader lock is needed for
35287  *      these members.  The c_spinlock is only ever held
35288  *      for a few cycles.
35289  *
35290  *      To prevent kmem_cache_shrink() trying to shrink a
35291  *      'growing' cache (which maybe be sleeping and
35292  *      therefore not holding the semaphore/lock), the
35293  *      c_growing field is used.  This also prevents
35294  *      reaping from a cache.
35295  *
35296  *      Note, caches can _never_ be destroyed.  When a
35297  *      sub-system (eg module) has finished with a cache,
35298  *      it can only be shrunk.  This leaves the cache
35299  *      empty, but already enabled for re-use, eg. during
35300  *      a module re-load.
35301  *
35302  *      Notes:
35303  *              o Constructors/deconstructors are called
35304  *              while the cache-lock is _not_ held.
35305  *              Therefore they _must_ be threaded.
35306  *              o Constructors must not attempt to
35307  *              allocate memory from the same cache that
35308  *              they are a constructor for - infinite
35309  *              loop!  (There is no easy way to trap
35310  *              this.)
35311  *              o The per-cache locks must be obtained
35312  *              with local-interrupts disabled.  o When
35313  *              compiled with debug support, and an
35314  *              object-verify (upon release) is request
35315  *              for a cache, the verify-function is
35316  *              called with the cache lock held.  This
35317  *              helps debugging.
35318  *              o The functions called from
35319  *              try_to_free_page() must not attempt to
35320  *              allocate memory from a cache which is
35321  *              being grown.  The buffer sub-system might
35322  *              try to allocate memory, via
35323  *              buffer_cachep.  As this pri is passed to
35324  *              the SLAB, and then (if necessary) onto
35325  *              the gfp() funcs (which avoid calling
35326  *              try_to_free_page()), no deadlock should
35327  *              happen.
35328  *
35329  *      The positioning of the per-cache lock is tricky.
35330  *      If the lock is placed on the same h/w cache line
35331  *      as commonly accessed members the number of L1
35332  *      cache-line faults is reduced.  However, this can
35333  *      lead to the cache-line ping-ponging between
35334  *      processors when the lock is in contention (and
35335  *      the common members are being accessed).  Decided
35336  *      to keep it away from common members.
35337  *
35338  *      More fine-graining is possible, with per-slab
35339  *      locks...but this might be taking fine graining
35340  *      too far, but would have the advantage;
35341  *
35342  *              During most allocs/frees no writes occur
35343  *              to the cache-struct.  Therefore a
35344  *              multi-reader/one writer lock could be
35345  *              used (the writer needed when the slab
35346  *              chain is being link/unlinked).  As we
35347  *              would not have an exclusion lock for the
35348  *              cache-structure, one would be needed
35349  *              per-slab (for updating s_free ptr, and/or
35350  *              the contents of s_index).
35351  *
35352  *      The above locking would allow parallel operations
35353  *      to different slabs within the same cache with
35354  *      reduced spinning.
35355  *
35356  *      Per-engine slab caches, backed by a global cache
35357  *      (as in Mach's Zone allocator), would allow most
35358  *      allocations from the same cache to execute in
35359  *      parallel.
35360  *
35361  *      At present, each engine can be growing a cache.
35362  *      This should be blocked.
35363  *
35364  *      It is not currently 100% safe to examine the
35365  *      page_struct outside of a kernel or global cli
35366  *      lock.  The risk is v. small, and non-fatal.
35367  *
35368  *      Calls to printk() are not 100% safe (the function
35369  *      is not threaded).  However, printk() is only used
35370  *      under an error condition, and the risk is
35371  *      v. small (not sure if the console write functions
35372  *      'enjoy' executing multiple contexts in parallel.
35373  *      I guess they don't...).  Note, for most calls to
35374  *      printk() any held cache-lock is dropped.  This is
35375  *      not always done for text size reasons - having
35376  *      *_unlock() everywhere is bloat.  */
35377 
35378 /* An implementation of the Slab Allocator as described
35379  * in outline in;
35380  *     UNIX Internals: The New Frontiers by Uresh Vahalia
35381  *     Pub: Prentice Hall      ISBN 0-13-101908-2
35382  * or with a little more detail in;
35383  *     The Slab Allocator: An Object-Caching Kernel
35384  *     Memory Allocator
35385  *     Jeff Bonwick (Sun Microsystems).
35386  *     Presented at: USENIX Summer 1994 Technical
35387  *     Conference */
35388 
35389 /* This implementation deviates from Bonwick's paper as
35390  * it does not use a hash-table for large objects, but
35391  * rather a per slab index to hold the bufctls.  This
35392  * allows the bufctl structure to be small (one word),
35393  * but limits the number of objects a slab (not a cache)
35394  * can contain when off-slab bufctls are used.  The limit
35395  * is the size of the largest general cache that does not
35396  * use off-slab bufctls, divided by the size of a bufctl.
35397  * For 32bit archs, is this 256/4 = 64.  This is not
35398  * serious, as it is only for large objects, when it is
35399  * unwise to have too many per slab.
35400  *
35401  * Note: This limit can be raised by introducing a
35402  * general cache whose size is less than 512
35403  * (PAGE_SIZE<<3), but greater than 256.  */
35404 
35405 #include        <linux/config.h>
35406 #include        <linux/slab.h>
35407 #include        <linux/interrupt.h>
35408 #include        <linux/init.h>
35409 
35410 /* If there is a different PAGE_SIZE around, and it works
35411  * with this allocator, then change the following.  */
35412 #if     (PAGE_SIZE != 8192 && PAGE_SIZE != 4096)
35413 #error Your page size is probably not correctly         \
35414  supported - please check
35415 #endif
35416 
35417 /* SLAB_MGMT_CHECKS   - 1 to enable extra checks in
35418  *                        kmem_cache_create().
35419  *                      0 if you wish to reduce memory
35420  *                        usage.
35421  *
35422  * SLAB_DEBUG_SUPPORT - 1 for kmem_cache_create() to
35423  *                        honour; SLAB_DEBUG_FREE,
35424  *                        SLAB_DEBUG_INITIAL,
35425  *                        SLAB_RED_ZONE & SLAB_POISON.
35426  *                      0 for faster, smaller, code
35427  *                        (especially in the critical
35428  *                        paths).
35429  *
35430  * SLAB_STATS         - 1 to collect stats for
35431  *                        /proc/slabinfo.
35432  *                      0 for faster, smaller, code
35433  *                        (especially in the critical
35434  *                        paths).
35435  *
35436  * SLAB_SELFTEST      - 1 to perform a few tests, mainly
35437  *                      for development. */
35438 #define         SLAB_MGMT_CHECKS        1
35439 #define         SLAB_DEBUG_SUPPORT      0
35440 #define         SLAB_STATS              0
35441 #define         SLAB_SELFTEST           0
35442 
35443 /* Shouldn't this be in a header file somewhere? */
35444 #define BYTES_PER_WORD          sizeof(void *)
35445 
35446 /* Legal flag mask for kmem_cache_create(). */
35447 #if     SLAB_DEBUG_SUPPORT
35448 #if     0
35449 #define SLAB_C_MASK                                     \
35450   (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE|    \
35451    SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP|         \
35452    SLAB_HIGH_PACK)                                      \
35453 #endif
35454 #define SLAB_C_MASK                                     \
35455   (SLAB_DEBUG_FREE|SLAB_DEBUG_INITIAL|SLAB_RED_ZONE|    \
35456    SLAB_POISON|SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
35457 #else
35458 #if     0
35459 #define SLAB_C_MASK                                     \
35460   (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP|SLAB_HIGH_PACK)
35461 #endif
35462 #define SLAB_C_MASK                                     \
35463   (SLAB_HWCACHE_ALIGN|SLAB_NO_REAP)
35464 #endif  /* SLAB_DEBUG_SUPPORT */
35465 
35466 /* Slab management struct.  Manages the objs in a slab.
35467  * Placed either at the end of mem allocated for a slab,
35468  * or from an internal obj cache (cache_slabp).  Slabs
35469  * are chained into a partially ordered list; fully used
35470  * first, partial next, and then fully free slabs.  The
35471  * first 4 members are referenced during an alloc/free
35472  * operation, and should always appear on the same cache
35473  * line.  Note: The offset between some members _must_
35474  * match offsets within the kmem_cache_t - see
35475  * kmem_cache_init() for the checks. */
35476 
35477 /* could make this larger for 64bit archs */
35478 #define SLAB_OFFSET_BITS        16
35479 
35480 typedef struct kmem_slab_s {
35481   /* ptr to first inactive obj in slab */
35482   struct kmem_bufctl_s *s_freep;
35483   struct kmem_bufctl_s *s_index;
35484   unsigned long         s_magic;
35485   /* num of objs active in slab */
35486   unsigned long         s_inuse;
35487 
35488   struct kmem_slab_s   *s_nextp;
35489   struct kmem_slab_s   *s_prevp;
35490   /* addr of first obj in slab */
35491   void                 *s_mem;
35492   unsigned long         s_offset:SLAB_OFFSET_BITS,
35493                         s_dma:1;
35494 } kmem_slab_t;
35495 
35496 /* When the slab management is on-slab, this gives the
35497  * size to use. */
35498 #define slab_align_size                                 \
35499         (L1_CACHE_ALIGN(sizeof(kmem_slab_t)))
35500 
35501 /* Test for end of slab chain. */
35502 #define kmem_slab_end(x)                                \
35503   ((kmem_slab_t*)&((x)->c_offset))
35504 
35505 /* s_magic */
35506 #define SLAB_MAGIC_ALLOC     0xA5C32F2BUL /* alive */
35507 #define SLAB_MAGIC_DESTROYED 0xB2F23C5AUL /* destroyed */
35508 
35509 /* Bufctl's are used for linking objs within a slab,
35510  * identifying what slab an obj is in, and the address of
35511  * the associated obj (for sanity checking with off-slab
35512  * bufctls).  What a bufctl contains depends upon the
35513  * state of the obj and the organisation of the cache. */
35514 typedef struct kmem_bufctl_s {
35515   union {
35516     struct kmem_bufctl_s  *buf_nextp;
35517     kmem_slab_t           *buf_slabp; /* slab for obj */
35518     void *                 buf_objp;
35519   } u;
35520 } kmem_bufctl_t;
35521 
35522 /* ...shorthand... */
35523 #define buf_nextp       u.buf_nextp
35524 #define buf_slabp       u.buf_slabp
35525 #define buf_objp        u.buf_objp
35526 
35527 #if     SLAB_DEBUG_SUPPORT
35528 /* Magic nums for obj red zoning.  Placed in the first
35529  * word before and the first word after an obj.  */
35530 #define SLAB_RED_MAGIC1 0x5A2CF071UL /* obj active */
35531 #define SLAB_RED_MAGIC2 0x170FC2A5UL /* obj inactive */
35532 
35533 /* ...and for poisoning */
35534 #define SLAB_POISON_BYTE 0x5a /* byte val for poisoning*/
35535 #define SLAB_POISON_END  0xa5 /* end-byte of poisoning */
35536 
35537 #endif  /* SLAB_DEBUG_SUPPORT */
35538 
35539 /* Cache struct - manages a cache.  First four members
35540  * are commonly referenced during an alloc/free
35541  * operation.  */
35542 struct kmem_cache_s {
35543   kmem_slab_t   *c_freep;       /* first w/ free objs */
35544   unsigned long c_flags;        /* constant flags */
35545   unsigned long c_offset;
35546   unsigned long c_num;          /* # of objs per slab */
35547 
35548   unsigned long c_magic;
35549   unsigned long c_inuse;        /* kept at zero */
35550   kmem_slab_t   *c_firstp;      /* first slab in chain */
35551   kmem_slab_t   *c_lastp;       /* last slab in chain */
35552 
35553   spinlock_t    c_spinlock;
35554   unsigned long c_growing;
35555   unsigned long c_dflags;       /* dynamic flags */
35556   size_t        c_org_size;
35557   unsigned long c_gfporder;  /* ord pgs per slab (2^n) */
35558   /* constructor func */
35559   void (*c_ctor)(void *, kmem_cache_t *, unsigned long);
35560   /* de-constructor func */
35561   void (*c_dtor)(void *, kmem_cache_t *, unsigned long);
35562   unsigned long c_align;        /* alignment of objs */
35563   size_t        c_colour;       /* cache coloring range*/
35564   size_t        c_colour_next;  /* cache coloring */
35565   unsigned long c_failures;
35566   const char    *c_name;
35567   struct kmem_cache_s *c_nextp;
35568   kmem_cache_t  *c_index_cachep;
35569 #if     SLAB_STATS
35570   unsigned long c_num_active;
35571   unsigned long c_num_allocations;
35572   unsigned long c_high_mark;
35573   unsigned long c_grown;
35574   unsigned long c_reaped;
35575   atomic_t c_errors;
35576 #endif  /* SLAB_STATS */
35577 };
35578 
35579 /* internal c_flags */
35580 /* slab management in own cache */
35581 #define SLAB_CFLGS_OFF_SLAB   0x010000UL
35582 /* bufctls in own cache */
35583 #define SLAB_CFLGS_BUFCTL     0x020000UL
35584 /* a general cache */
35585 #define SLAB_CFLGS_GENERAL    0x080000UL
35586 
35587 /* c_dflags (dynamic flags).  Need to hold the spinlock
35588  * to access this member */
35589 /* don't reap a recently grown */
35590 #define SLAB_CFLGS_GROWN      0x000002UL
35591 
35592 #define SLAB_OFF_SLAB(x)      ((x) & SLAB_CFLGS_OFF_SLAB)
35593 #define SLAB_BUFCTL(x)        ((x) & SLAB_CFLGS_BUFCTL)
35594 #define SLAB_GROWN(x)         ((x) & SLAB_CFLGS_GROWN)
35595 
35596 #if     SLAB_STATS
35597 #define SLAB_STATS_INC_ACTIVE(x)    ((x)->c_num_active++)
35598 #define SLAB_STATS_DEC_ACTIVE(x)    ((x)->c_num_active--)
35599 #define SLAB_STATS_INC_ALLOCED(x)                       \
35600                                ((x)->c_num_allocations++)
35601 #define SLAB_STATS_INC_GROWN(x)     ((x)->c_grown++)
35602 #define SLAB_STATS_INC_REAPED(x)    ((x)->c_reaped++)
35603 #define SLAB_STATS_SET_HIGH(x)                          \
35604   do {                                                  \
35605     if ((x)->c_num_active > (x)->c_high_mark)           \
35606       (x)->c_high_mark = (x)->c_num_active;             \
35607   } while (0)
35608 #define SLAB_STATS_INC_ERR(x)                           \
35609   (atomic_inc(&(x)->c_errors))
35610 #else
35611 #define SLAB_STATS_INC_ACTIVE(x)
35612 #define SLAB_STATS_DEC_ACTIVE(x)
35613 #define SLAB_STATS_INC_ALLOCED(x)
35614 #define SLAB_STATS_INC_GROWN(x)
35615 #define SLAB_STATS_INC_REAPED(x)
35616 #define SLAB_STATS_SET_HIGH(x)
35617 #define SLAB_STATS_INC_ERR(x)
35618 #endif  /* SLAB_STATS */
35619 
35620 #if     SLAB_SELFTEST
35621 #if     !SLAB_DEBUG_SUPPORT
35622 #error  Debug support needed for self-test
35623 #endif
35624 static void kmem_self_test(void);
35625 #endif  /* SLAB_SELFTEST */
35626 
35627 /* c_magic - used to detect 'out of slabs' in
35628  * __kmem_cache_alloc() */
35629 #define SLAB_C_MAGIC            0x4F17A36DUL
35630 
35631 /* maximum size of an obj (in 2^order pages) */
35632 #define SLAB_OBJ_MAX_ORDER      5       /* 32 pages */
35633 
35634 /* maximum num of pages for a slab (prevents large
35635  * requests to the VM layer) */
35636 #define SLAB_MAX_GFP_ORDER      5       /* 32 pages */
35637 
35638 /* the 'preferred' minimum num of objs per slab - maybe
35639  * less for large objs */
35640 #define SLAB_MIN_OBJS_PER_SLAB  4
35641 
35642 /* If the num of objs per slab is <=
35643  * SLAB_MIN_OBJS_PER_SLAB, then the page order must be
35644  * less than this before trying the next order.  */
35645 #define SLAB_BREAK_GFP_ORDER_HI 2
35646 #define SLAB_BREAK_GFP_ORDER_LO 1
35647 static int slab_break_gfp_order =
35648  SLAB_BREAK_GFP_ORDER_LO;
35649 
35650 /* Macros for storing/retrieving the cachep and or slab
35651  * from the global 'mem_map'.  With off-slab bufctls,
35652  * these are used to find the slab an obj belongs to.
35653  * With kmalloc(), and kfree(), these are used to find
35654  * the cache which an obj belongs to.  */
35655 #define SLAB_SET_PAGE_CACHE(pg, x)                      \
35656   ((pg)->next = (struct page *)(x))
35657 #define SLAB_GET_PAGE_CACHE(pg)                         \
35658   ((kmem_cache_t *)(pg)->next)
35659 #define SLAB_SET_PAGE_SLAB(pg, x)                       \
35660   ((pg)->prev = (struct page *)(x))
35661 #define SLAB_GET_PAGE_SLAB(pg)                          \
35662   ((kmem_slab_t *)(pg)->prev)
35663 
35664 /* Size description struct for general caches. */
35665 typedef struct cache_sizes {
35666   size_t           cs_size;
35667   kmem_cache_t    *cs_cachep;
35668 } cache_sizes_t;
35669 
35670 static cache_sizes_t cache_sizes[] = {
35671 #if     PAGE_SIZE == 4096
35672   {  32,          NULL},
35673 #endif
35674   {  64,          NULL},
35675   { 128,          NULL},
35676   { 256,          NULL},
35677   { 512,          NULL},
35678   {1024,          NULL},
35679   {2048,          NULL},
35680   {4096,          NULL},
35681   {8192,          NULL},
35682   {16384,         NULL},
35683   {32768,         NULL},
35684   {65536,         NULL},
35685   {131072,        NULL},
35686   {0,             NULL}
35687 };
35688 
35689 /* Names for the general caches.  Not placed into the
35690  * sizes struct for a good reason; the string ptr is not
35691  * needed while searching in kmalloc(), and would
35692  * 'get-in-the-way' in the h/w cache.  */
35693 static char *cache_sizes_name[] = {
35694 #if     PAGE_SIZE == 4096
35695   "size-32",
35696 #endif
35697   "size-64",
35698   "size-128",
35699   "size-256",
35700   "size-512",
35701   "size-1024",
35702   "size-2048",
35703   "size-4096",
35704   "size-8192",
35705   "size-16384",
35706   "size-32768",
35707   "size-65536",
35708   "size-131072"
35709 };
35710 
35711 /* internal cache of cache description objs */
35712 static kmem_cache_t cache_cache = {
35713 /* freep, flags */        kmem_slab_end(&cache_cache),
35714                           SLAB_NO_REAP,
35715 /* offset, num */         sizeof(kmem_cache_t),   0,
35716 /* c_magic, c_inuse */    SLAB_C_MAGIC, 0,
35717 /* firstp, lastp */       kmem_slab_end(&cache_cache),
35718                           kmem_slab_end(&cache_cache),
35719 /* spinlock */            SPIN_LOCK_UNLOCKED,
35720 /* growing */             0,
35721 /* dflags */              0,
35722 /* org_size, gfp */       0, 0,
35723 /* ctor, dtor, align */   NULL, NULL, L1_CACHE_BYTES,
35724 /* colour, colour_next */ 0, 0,
35725 /* failures */            0,
35726 /* name */                "kmem_cache",
35727 /* nextp */               &cache_cache,
35728 /* index */               NULL,
35729 };
35730 
35731 /* Guard access to the cache-chain. */
35732 static struct semaphore cache_chain_sem;
35733 
35734 /* Place maintainer for reaping. */
35735 static  kmem_cache_t    *clock_searchp = &cache_cache;
35736 
35737 /* Internal slab management cache, for when slab
35738  * management is off-slab. */
35739 static kmem_cache_t     *cache_slabp = NULL;
35740 
35741 /* Max number of objs-per-slab for caches which use
35742  * bufctl's.  Needed to avoid a possible looping
35743  * condition in kmem_cache_grow().  */
35744 static unsigned long bufctl_limit = 0;
35745 
35746 /* Initialisation - setup the `cache' cache. */
35747 long __init kmem_cache_init(long start, long end)
35748 {
35749   size_t size, i;
35750 
35751 #define kmem_slab_offset(x)                             \
35752     ((unsigned long)&((kmem_slab_t *)0)->x)
35753 #define kmem_slab_diff(a,b)                             \
35754     (kmem_slab_offset(a) - kmem_slab_offset(b))
35755 #define kmem_cache_offset(x)                            \
35756     ((unsigned long)&((kmem_cache_t *)0)->x)
35757 #define kmem_cache_diff(a,b)                            \
35758     (kmem_cache_offset(a) - kmem_cache_offset(b))
35759 
35760   /* Sanity checks... */
35761   if (kmem_cache_diff(c_firstp, c_magic) !=
35762       kmem_slab_diff(s_nextp, s_magic)   ||
35763       kmem_cache_diff(c_firstp, c_inuse) !=
35764       kmem_slab_diff(s_nextp, s_inuse)   ||
35765       ((kmem_cache_offset(c_lastp) -
35766         ((unsigned long)
35767          kmem_slab_end((kmem_cache_t*)NULL))) !=
35768        kmem_slab_offset(s_prevp))        ||
35769       kmem_cache_diff(c_lastp, c_firstp) !=
35770       kmem_slab_diff(s_prevp, s_nextp)) {
35771     /* Offsets to the magic are incorrect, either the
35772      * structures have been incorrectly changed, or
35773      * adjustments are needed for your architecture.  */
35774     panic("kmem_cache_init(): Offsets are wrong - "
35775           "I've been messed with!");
35776     /* NOTREACHED */
35777   }
35778 #undef  kmem_cache_offset
35779 #undef  kmem_cache_diff
35780 #undef  kmem_slab_offset
35781 #undef  kmem_slab_diff
35782 
35783   cache_chain_sem = MUTEX;
35784 
35785   size = cache_cache.c_offset + sizeof(kmem_bufctl_t);
35786   size += (L1_CACHE_BYTES-1);
35787   size &= ~(L1_CACHE_BYTES-1);
35788   cache_cache.c_offset = size-sizeof(kmem_bufctl_t);
35789 
35790   i = (PAGE_SIZE << cache_cache.c_gfporder) -
35791       slab_align_size;
35792   cache_cache.c_num = i / size; /* objs / slab */
35793 
35794   /* Cache colouring. */
35795   cache_cache.c_colour =
35796     (i-(cache_cache.c_num*size))/L1_CACHE_BYTES;
35797   cache_cache.c_colour_next = cache_cache.c_colour;
35798 
35799   /* Fragmentation resistance on low memory - only use
35800    * bigger page orders on machines with more than 32MB
35801    * of memory.  */
35802   if (num_physpages > (32 << 20) >> PAGE_SHIFT)
35803     slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_HI;
35804   return start;
35805 }
35806 
35807 /* Initialisation - setup remaining internal and general
35808  * caches.  Called after the gfp() functions have been
35809  * enabled, and before smp_init().  */
35810 void __init kmem_cache_sizes_init(void)
35811 {
35812   unsigned int    found = 0;
35813 
35814   cache_slabp = kmem_cache_create("slab_cache",
35815     sizeof(kmem_slab_t), 0, SLAB_HWCACHE_ALIGN,
35816     NULL, NULL);
35817   if (cache_slabp) {
35818     char **names = cache_sizes_name;
35819     cache_sizes_t *sizes = cache_sizes;
35820     do {
35821       /* For performance, all the general caches are L1
35822        * aligned.  This should be particularly beneficial
35823        * on SMP boxes, as it eliminates "false sharing".
35824        * Note for systems short on memory removing the
35825        * alignment will allow tighter packing of the
35826        * smaller caches. */
35827       if (!(sizes->cs_cachep =
35828             kmem_cache_create(*names++, sizes->cs_size,
35829             0, SLAB_HWCACHE_ALIGN, NULL, NULL)))
35830         goto panic_time;
35831       if (!found) {
35832         /* Inc off-slab bufctl limit until the ceiling is
35833          * hit. */
35834         if (SLAB_BUFCTL(sizes->cs_cachep->c_flags))
35835           found++;
35836         else
35837           bufctl_limit =
35838             (sizes->cs_size/sizeof(kmem_bufctl_t));
35839       }
35840       sizes->cs_cachep->c_flags |= SLAB_CFLGS_GENERAL;
35841       sizes++;
35842     } while (sizes->cs_size);
35843 #if     SLAB_SELFTEST
35844     kmem_self_test();
35845 #endif  /* SLAB_SELFTEST */
35846     return;
35847   }
35848 panic_time:
35849   panic("kmem_cache_sizes_init: Error creating caches");
35850   /* NOTREACHED */
35851 }
35852 
35853 /* Interface to system's page allocator.  Dma pts to
35854  * non-zero if all of memory is DMAable. No need to hold
35855  * the cache-lock.  */
35856 static inline void *
35857 kmem_getpages(kmem_cache_t *cachep, unsigned long flags,
35858               unsigned int *dma)
35859 {
35860   void    *addr;
35861 
35862   *dma = flags & SLAB_DMA;
35863   addr =
35864     (void*) __get_free_pages(flags, cachep->c_gfporder);
35865   /* Assume that now we have the pages no one else can
35866    * legally messes with the 'struct page's.  However
35867    * vm_scan() might try to test the structure to see if
35868    * it is a named-page or buffer-page.  The members it
35869    * tests are of no interest here.....  */
35870   if (!*dma && addr) {
35871     /* Need to check if can dma. */
35872     struct page *page = mem_map + MAP_NR(addr);
35873     *dma = 1 << cachep->c_gfporder;
35874     while ((*dma)--) {
35875       if (!PageDMA(page)) {
35876         *dma = 0;
35877         break;
35878       }
35879       page++;
35880     }
35881   }
35882   return addr;
35883 }
35884 
35885 /* Interface to system's page release. */
35886 static inline void
35887 kmem_freepages(kmem_cache_t *cachep, void *addr)
35888 {
35889   unsigned long i = (1<<cachep->c_gfporder);
35890   struct page *page = &mem_map[MAP_NR(addr)];
35891 
35892   /* free_pages() does not clear the type bit - we do
35893    * that.  The pages have been unlinked from their
35894    * cache-slab, but their 'struct page's might be
35895    * accessed in vm_scan(). Shouldn't be a worry.  */
35896   while (i--) {
35897     PageClearSlab(page);
35898     page++;
35899   }
35900   free_pages((unsigned long)addr, cachep->c_gfporder);
35901 }
35902 
35903 #if     SLAB_DEBUG_SUPPORT
35904 static inline void
35905 kmem_poison_obj(kmem_cache_t *cachep, void *addr)
35906 {
35907   memset(addr, SLAB_POISON_BYTE, cachep->c_org_size);
35908   *(unsigned char *)(addr+cachep->c_org_size-1) =
35909     SLAB_POISON_END;
35910 }
35911 
35912 static inline int
35913 kmem_check_poison_obj(kmem_cache_t *cachep, void *addr)
35914 {
35915   void *end;
35916   end = memchr(addr, SLAB_POISON_END,
35917                cachep->c_org_size);
35918   if (end != (addr+cachep->c_org_size-1))
35919     return 1;
35920   return 0;
35921 }
35922 #endif  /* SLAB_DEBUG_SUPPORT */
35923 
35924 /* Three slab chain funcs - all called with ints disabled
35925  * and the appropriate cache-lock held.  */
35926 static inline void
35927 kmem_slab_unlink(kmem_slab_t *slabp)
35928 {
35929   kmem_slab_t *prevp = slabp->s_prevp;
35930   kmem_slab_t *nextp = slabp->s_nextp;
35931   prevp->s_nextp = nextp;
35932   nextp->s_prevp = prevp;
35933 }
35934 
35935 static inline void
35936 kmem_slab_link_end(kmem_cache_t *cachep,
35937                    kmem_slab_t *slabp)
35938 {
35939   kmem_slab_t *lastp = cachep->c_lastp;
35940   slabp->s_nextp  = kmem_slab_end(cachep);
35941   slabp->s_prevp  = lastp;
35942   cachep->c_lastp = slabp;
35943   lastp->s_nextp  = slabp;
35944 }
35945 
35946 static inline void
35947 kmem_slab_link_free(kmem_cache_t *cachep,
35948                     kmem_slab_t *slabp)
35949 {
35950   kmem_slab_t *nextp = cachep->c_freep;
35951   kmem_slab_t *prevp = nextp->s_prevp;
35952   slabp->s_nextp = nextp;
35953   slabp->s_prevp = prevp;
35954   nextp->s_prevp = slabp;
35955   slabp->s_prevp->s_nextp = slabp;
35956 }
35957 
35958 /* Destroy all the objs in a slab, and release the mem
35959  * back to the system.  Before calling the slab must have
35960  * been unlinked from the cache.  The cache-lock is not
35961  * held/needed.  */
35962 static void
35963 kmem_slab_destroy(kmem_cache_t *cachep,
35964                   kmem_slab_t *slabp)
35965 {
35966   if (cachep->c_dtor
35967 #if     SLAB_DEBUG_SUPPORT
35968     || cachep->c_flags & (SLAB_POISON | SLAB_RED_ZONE)
35969 #endif  /*SLAB_DEBUG_SUPPORT*/
35970   ) {
35971     /* Doesn't use the bufctl ptrs to find objs. */
35972     unsigned long num = cachep->c_num;
35973     void *objp = slabp->s_mem;
35974     do {
35975 #if     SLAB_DEBUG_SUPPORT
35976       if (cachep->c_flags & SLAB_RED_ZONE) {
35977         if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1)
35978           printk(KERN_ERR "kmem_slab_destroy: "
35979                  "Bad front redzone - %s\n",
35980                  cachep->c_name);
35981         objp += BYTES_PER_WORD;
35982         if (*((unsigned long*)(objp+cachep->c_org_size))
35983             != SLAB_RED_MAGIC1)
35984           printk(KERN_ERR "kmem_slab_destroy: "
35985                  "Bad rear redzone - %s\n",
35986                  cachep->c_name);
35987       }
35988       if (cachep->c_dtor)
35989 #endif  /*SLAB_DEBUG_SUPPORT*/
35990         (cachep->c_dtor)(objp, cachep, 0);
35991 #if     SLAB_DEBUG_SUPPORT
35992       else if (cachep->c_flags & SLAB_POISON) {
35993         if (kmem_check_poison_obj(cachep, objp))
35994           printk(KERN_ERR "kmem_slab_destroy: "
35995                  "Bad poison - %s\n", cachep->c_name);
35996       }
35997       if (cachep->c_flags & SLAB_RED_ZONE)
35998         objp -= BYTES_PER_WORD;
35999 #endif  /* SLAB_DEBUG_SUPPORT */
36000       objp += cachep->c_offset;
36001       if (!slabp->s_index)
36002         objp += sizeof(kmem_bufctl_t);
36003     } while (--num);
36004   }
36005 
36006   slabp->s_magic = SLAB_MAGIC_DESTROYED;
36007   if (slabp->s_index)
36008     kmem_cache_free(cachep->c_index_cachep,
36009                     slabp->s_index);
36010   kmem_freepages(cachep, slabp->s_mem-slabp->s_offset);
36011   if (SLAB_OFF_SLAB(cachep->c_flags))
36012     kmem_cache_free(cache_slabp, slabp);
36013 }
36014 
36015 /* Call the num objs, wastage, and bytes left over for a
36016  * given slab size. */
36017 static inline size_t
36018 kmem_cache_cal_waste(unsigned long gfporder, size_t size,
36019   size_t extra, unsigned long flags, size_t *left_over,
36020   unsigned long *num)
36021 {
36022   size_t wastage = PAGE_SIZE<<gfporder;
36023 
36024   if (SLAB_OFF_SLAB(flags))
36025     gfporder = 0;
36026   else
36027     gfporder = slab_align_size;
36028   wastage -= gfporder;
36029   *num = wastage / size;
36030   wastage -= (*num * size);
36031   *left_over = wastage;
36032 
36033   return (wastage + gfporder + (extra * *num));
36034 }
36035 
36036 /* Create a cache: Returns a ptr to the cache on success,
36037  * NULL on failure.  Cannot be called within a int, but
36038  * can be interrupted.  NOTE: The 'name' is assumed to be
36039  * memory that is _not_ going to disappear.  */
36040 kmem_cache_t *
36041 kmem_cache_create(const char *name, size_t size,
36042   size_t offset, unsigned long flags,
36043   void (*ctor)(void*, kmem_cache_t *, unsigned long),
36044   void (*dtor)(void*, kmem_cache_t *, unsigned long))
36045 {
36046   const char *func_nm= KERN_ERR "kmem_create: ";
36047   kmem_cache_t    *searchp;
36048   kmem_cache_t    *cachep=NULL;
36049   size_t          extra;
36050   size_t          left_over;
36051   size_t          align;
36052 
36053   /* Sanity checks... */
36054 #if     SLAB_MGMT_CHECKS
36055   if (!name) {
36056     printk("%sNULL ptr\n", func_nm);
36057     goto opps;
36058   }
36059   if (in_interrupt()) {
36060     printk("%sCalled during int - %s\n", func_nm, name);
36061     goto opps;
36062   }
36063 
36064   if (size < BYTES_PER_WORD) {
36065     printk("%sSize too small %d - %s\n",
36066            func_nm, (int) size, name);
36067     size = BYTES_PER_WORD;
36068   }
36069 
36070   if (size > ((1<<SLAB_OBJ_MAX_ORDER)*PAGE_SIZE)) {
36071     printk("%sSize too large %d - %s\n",
36072            func_nm, (int) size, name);
36073     goto opps;
36074   }
36075 
36076   if (dtor && !ctor) {
36077     /* Decon, but no con - doesn't make sense */
36078     printk("%sDecon but no con - %s\n", func_nm, name);
36079     goto opps;
36080   }
36081 
36082   if (offset < 0 || offset > size) {
36083     printk("%sOffset weird %d - %s\n",
36084            func_nm, (int) offset, name);
36085     offset = 0;
36086   }
36087 
36088 #if     SLAB_DEBUG_SUPPORT
36089   if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
36090     /* No ctor, but inital state check requested */
36091     printk("%sNo con, but init state check requested - "
36092            "%s\n", func_nm, name);
36093     flags &= ~SLAB_DEBUG_INITIAL;
36094   }
36095 
36096   if ((flags & SLAB_POISON) && ctor) {
36097     /* request for poisoning, but we can't do that with a
36098      * constructor */
36099     printk("%sPoisoning requested, but con given - %s\n",
36100            func_nm, name);
36101     flags &= ~SLAB_POISON;
36102   }
36103 #if     0
36104   if ((flags & SLAB_HIGH_PACK) && ctor) {
36105     printk("%sHigh pack requested, but con given - %s\n",
36106            func_nm, name);
36107     flags &= ~SLAB_HIGH_PACK;
36108   }
36109   if ((flags & SLAB_HIGH_PACK) &&
36110       (flags & (SLAB_POISON|SLAB_RED_ZONE))) {
36111     printk("%sHigh pack requested, but with "
36112            "poisoning/red-zoning - %s\n",
36113            func_nm, name);
36114     flags &= ~SLAB_HIGH_PACK;
36115   }
36116 #endif
36117 #endif  /* SLAB_DEBUG_SUPPORT */
36118 #endif  /* SLAB_MGMT_CHECKS */
36119 
36120   /* Always checks flags, a caller might be expecting
36121    * debug support which isn't available.  */
36122   if (flags & ~SLAB_C_MASK) {
36123     printk("%sIllgl flg %lX - %s\n",
36124            func_nm, flags, name);
36125     flags &= SLAB_C_MASK;
36126   }
36127 
36128   /* Get cache's description obj. */
36129   cachep =
36130     (kmem_cache_t *) kmem_cache_alloc(&cache_cache,
36131                                       SLAB_KERNEL);
36132   if (!cachep)
36133     goto opps;
36134   memset(cachep, 0, sizeof(kmem_cache_t));
36135 
36136   /* Check that size is in terms of words.  This is
36137    * needed to avoid unaligned accesses for some archs
36138    * when redzoning is used, and makes sure any on-slab
36139    * bufctl's are also correctly aligned.  */
36140   if (size & (BYTES_PER_WORD-1)) {
36141     size += (BYTES_PER_WORD-1);
36142     size &= ~(BYTES_PER_WORD-1);
36143     printk("%sForcing size word alignment - %s\n",
36144            func_nm, name);
36145   }
36146 
36147   cachep->c_org_size = size;
36148 #if     SLAB_DEBUG_SUPPORT
36149   if (flags & SLAB_RED_ZONE) {
36150     /* There is no point trying to honour cache alignment
36151      * when redzoning. */
36152     flags &= ~SLAB_HWCACHE_ALIGN;
36153     size += 2*BYTES_PER_WORD;   /* words for redzone */
36154   }
36155 #endif  /* SLAB_DEBUG_SUPPORT */
36156 
36157   align = BYTES_PER_WORD;
36158   if (flags & SLAB_HWCACHE_ALIGN)
36159     align = L1_CACHE_BYTES;
36160 
36161   /* Determine if the slab management and/or bufclts are
36162    * 'on' or 'off' slab. */
36163   extra = sizeof(kmem_bufctl_t);
36164   if (size < (PAGE_SIZE>>3)) {
36165     /* Size is small(ish).  Use packing where bufctl size
36166      * per obj is low, and slab management is on-slab. */
36167 #if     0
36168     if ((flags & SLAB_HIGH_PACK)) {
36169       /* Special high packing for small objects (mainly
36170        * for vm_mapping structs, but others can use it).
36171        */
36172       if (size == (L1_CACHE_BYTES/4) ||
36173           size == (L1_CACHE_BYTES/2) ||
36174           size == L1_CACHE_BYTES) {
36175         /* The bufctl is stored with the object. */
36176         extra = 0;
36177       } else
36178         flags &= ~SLAB_HIGH_PACK;
36179     }
36180 #endif
36181   } else {
36182     /* Size is large, assume best to place the slab
36183      * management obj off-slab (should allow better
36184      * packing of objs).  */
36185     flags |= SLAB_CFLGS_OFF_SLAB;
36186     if (!(size & ~PAGE_MASK)  || size == (PAGE_SIZE/2) ||
36187         size == (PAGE_SIZE/4) || size == (PAGE_SIZE/8)) {
36188       /* To avoid waste the bufctls are off-slab... */
36189       flags |= SLAB_CFLGS_BUFCTL;
36190       extra = 0;
36191     } /* else slab management is off-slab, but freelist
36192        * pointers are on. */
36193   }
36194   size += extra;
36195 
36196   if (flags & SLAB_HWCACHE_ALIGN) {
36197     /* Need to adjust size so that objs are cache
36198      * aligned. */
36199     if (size > (L1_CACHE_BYTES/2)) {
36200       size_t words = size % L1_CACHE_BYTES;
36201       if (words)
36202         size += (L1_CACHE_BYTES-words);
36203     } else {
36204       /* Small obj size, can get at least two per cache
36205        * line. */
36206       int num_per_line = L1_CACHE_BYTES/size;
36207       left_over = L1_CACHE_BYTES - (num_per_line*size);
36208       if (left_over) {
36209         /* Need to adjust size so objs cache align. */
36210         if (left_over%num_per_line) {
36211           /* Odd num of objs per line - fixup. */
36212           num_per_line--;
36213           left_over += size;
36214         }
36215         size += (left_over/num_per_line);
36216       }
36217     }
36218   } else if (!(size%L1_CACHE_BYTES)) {
36219     /* Size happens to cache align... */
36220     flags |= SLAB_HWCACHE_ALIGN;
36221     align = L1_CACHE_BYTES;
36222   }
36223 
36224   /* Cal size (in pages) of slabs, and the num of objs
36225    * per slab.  This could be made much more intelligent.
36226    * For now, try to avoid using high page-orders for
36227    * slabs.  When the gfp() funcs are more friendly
36228    * towards high-order requests, this should be changed.
36229    */
36230   do {
36231     size_t wastage;
36232     unsigned int break_flag = 0;
36233 cal_wastage:
36234     wastage = kmem_cache_cal_waste(cachep->c_gfporder,
36235          size, extra, flags, &left_over, &cachep->c_num);
36236     if (!cachep->c_num)
36237       goto next;
36238     if (break_flag)
36239       break;
36240     if (SLAB_BUFCTL(flags) &&
36241         cachep->c_num > bufctl_limit) {
36242       /* Oops, this num of objs will cause problems. */
36243       cachep->c_gfporder--;
36244       break_flag++;
36245       goto cal_wastage;
36246     }
36247     if (cachep->c_gfporder == SLAB_MAX_GFP_ORDER)
36248       break;
36249 
36250     /* Large num of objs is good, but v. large slabs are
36251      * currently bad for the gfp()s.  */
36252     if (cachep->c_num <= SLAB_MIN_OBJS_PER_SLAB) {
36253       if (cachep->c_gfporder < slab_break_gfp_order)
36254         goto next;
36255     }
36256 
36257     /* Stop caches with small objs having a large num of
36258      * pages. */
36259     if (left_over <= slab_align_size)
36260       break;
36261     if ((wastage*8) <= (PAGE_SIZE<<cachep->c_gfporder))
36262       break;  /* Acceptable internal fragmentation. */
36263 next:
36264     cachep->c_gfporder++;
36265   } while (1);
36266 
36267   /* If the slab has been placed off-slab, and we have
36268    * enough space then move it on-slab.  This is at the
36269    * expense of any extra colouring.  */
36270   if ((flags & SLAB_CFLGS_OFF_SLAB) &&
36271       !SLAB_BUFCTL(flags) &&
36272       left_over >= slab_align_size) {
36273     flags &= ~SLAB_CFLGS_OFF_SLAB;
36274     left_over -= slab_align_size;
36275   }
36276 
36277   /* Offset must be a factor of the alignment. */
36278   offset += (align-1);
36279   offset &= ~(align-1);
36280 
36281   /* Mess around with the offset alignment. */
36282   if (!left_over) {
36283     offset = 0;
36284   } else if (left_over < offset) {
36285     offset = align;
36286     if (flags & SLAB_HWCACHE_ALIGN) {
36287       if (left_over < offset)
36288         offset = 0;
36289     } else {
36290       /* Offset is BYTES_PER_WORD, and left_over is at
36291        * least BYTES_PER_WORD.
36292        */
36293       if (left_over >= (BYTES_PER_WORD*2)) {
36294         offset >>= 1;
36295         if (left_over >= (BYTES_PER_WORD*4))
36296           offset >>= 1;
36297       }
36298     }
36299   } else if (!offset) {
36300     /* No offset requested, but space enough - give
36301      * one. */
36302     offset = left_over/align;
36303     if (flags & SLAB_HWCACHE_ALIGN) {
36304       if (offset >= 8) {
36305         /* A large number of colours - use a larger
36306          * alignment. */
36307         align <<= 1;
36308       }
36309     } else {
36310       if (offset >= 10) {
36311         align <<= 1;
36312         if (offset >= 16)
36313           align <<= 1;
36314       }
36315     }
36316     offset = align;
36317   }
36318 
36319 #if     0
36320 printk("%s: Left_over:%d Align:%d Size:%d\n",
36321        name, left_over, offset, size);
36322 #endif
36323 
36324   if ((cachep->c_align = (unsigned long) offset))
36325     cachep->c_colour = (left_over/offset);
36326   cachep->c_colour_next = cachep->c_colour;
36327 
36328   /* If the bufctl's are on-slab, c_offset does not
36329    * include the size of bufctl. */
36330   if (!SLAB_BUFCTL(flags))
36331     size -= sizeof(kmem_bufctl_t);
36332   else
36333     cachep->c_index_cachep =
36334       kmem_find_general_cachep(cachep->c_num *
36335                                sizeof(kmem_bufctl_t));
36336   cachep->c_offset = (unsigned long) size;
36337   cachep->c_freep = kmem_slab_end(cachep);
36338   cachep->c_firstp = kmem_slab_end(cachep);
36339   cachep->c_lastp = kmem_slab_end(cachep);
36340   cachep->c_flags = flags;
36341   cachep->c_ctor = ctor;
36342   cachep->c_dtor = dtor;
36343   cachep->c_magic = SLAB_C_MAGIC;
36344   cachep->c_name = name;  /* Simply point to the name. */
36345   spin_lock_init(&cachep->c_spinlock);
36346 
36347   /* Need the semaphore to access the chain. */
36348   down(&cache_chain_sem);
36349   searchp = &cache_cache;
36350   do {
36351     /* The name field is constant - no lock needed. */
36352     if (!strcmp(searchp->c_name, name)) {
36353       printk("%sDup name - %s\n", func_nm, name);
36354       break;
36355     }
36356     searchp = searchp->c_nextp;
36357   } while (searchp != &cache_cache);
36358 
36359   /* There is no reason to lock our new cache before we
36360    * link it in - no one knows about it yet...
36361    */
36362   cachep->c_nextp = cache_cache.c_nextp;
36363   cache_cache.c_nextp = cachep;
36364   up(&cache_chain_sem);
36365 opps:
36366   return cachep;
36367 }
36368 
36369 /* Shrink a cache.  Releases as many slabs as possible
36370  * for a cache.  It is expected this function will be
36371  * called by a module when it is unloaded.  The cache is
36372  * _not_ removed, this creates too many problems and the
36373  * cache-structure does not take up much room.  A module
36374  * should keep its cache pointer(s) in unloaded memory,
36375  * so when reloaded it knows the cache is available.  To
36376  * help debugging, a zero exit status indicates all slabs
36377  * were released.  */
36378 int
36379 kmem_cache_shrink(kmem_cache_t *cachep)
36380 {
36381   kmem_cache_t    *searchp;
36382   kmem_slab_t     *slabp;
36383   int     ret;
36384 
36385   if (!cachep) {
36386     printk(KERN_ERR "kmem_shrink: NULL ptr\n");
36387     return 2;
36388   }
36389   if (in_interrupt()) {
36390     printk(KERN_ERR "kmem_shrink: Called during int - "
36391            "%s\n", cachep->c_name);
36392     return 2;
36393   }
36394 
36395   /* Find the cache in the chain of caches. */
36396   down(&cache_chain_sem);      /* Semaphore is needed. */
36397   searchp = &cache_cache;
36398   for (;searchp->c_nextp != &cache_cache;
36399        searchp = searchp->c_nextp) {
36400     if (searchp->c_nextp != cachep)
36401       continue;
36402 
36403     /* Accessing clock_searchp is safe - we hold the
36404      * mutex. */
36405     if (cachep == clock_searchp)
36406       clock_searchp = cachep->c_nextp;
36407     goto found;
36408   }
36409   up(&cache_chain_sem);
36410   printk(KERN_ERR "kmem_shrink: Invalid cache addr %p\n",
36411          cachep);
36412   return 2;
36413 found:
36414   /* Release the semaphore before getting the cache-lock.
36415    * This could mean multiple engines are shrinking the
36416    * cache, but so what.  */
36417   up(&cache_chain_sem);
36418   spin_lock_irq(&cachep->c_spinlock);
36419 
36420   /* If the cache is growing, stop shrinking. */
36421   while (!cachep->c_growing) {
36422     slabp = cachep->c_lastp;
36423     if (slabp->s_inuse || slabp == kmem_slab_end(cachep))
36424       break;
36425     kmem_slab_unlink(slabp);
36426     spin_unlock_irq(&cachep->c_spinlock);
36427     kmem_slab_destroy(cachep, slabp);
36428     spin_lock_irq(&cachep->c_spinlock);
36429   }
36430   ret = 1;
36431   if (cachep->c_lastp == kmem_slab_end(cachep))
36432     ret--;          /* Cache is empty. */
36433   spin_unlock_irq(&cachep->c_spinlock);
36434   return ret;
36435 }
36436 
36437 /* Get the memory for a slab management obj. */
36438 static inline kmem_slab_t *
36439 kmem_cache_slabmgmt(kmem_cache_t *cachep, void *objp,
36440                     int local_flags)
36441 {
36442   kmem_slab_t     *slabp;
36443 
36444   if (SLAB_OFF_SLAB(cachep->c_flags)) {
36445     /* Slab management obj is off-slab. */
36446     slabp = kmem_cache_alloc(cache_slabp, local_flags);
36447   } else {
36448     /* Slab management at end of slab memory, placed so
36449      * that the position is 'coloured'.  */
36450     void *end;
36451     end = objp + (cachep->c_num * cachep->c_offset);
36452     if (!SLAB_BUFCTL(cachep->c_flags))
36453       end += (cachep->c_num * sizeof(kmem_bufctl_t));
36454     slabp =
36455       (kmem_slab_t *) L1_CACHE_ALIGN((unsigned long)end);
36456   }
36457 
36458   if (slabp) {
36459     slabp->s_inuse = 0;
36460     slabp->s_dma = 0;
36461     slabp->s_index = NULL;
36462   }
36463 
36464   return slabp;
36465 }
36466 
36467 static inline void
36468 kmem_cache_init_objs(kmem_cache_t * cachep,
36469                      kmem_slab_t * slabp, void *objp,
36470                      unsigned long ctor_flags)
36471 {
36472   kmem_bufctl_t   **bufpp = &slabp->s_freep;
36473   unsigned long   num = cachep->c_num-1;
36474 
36475   do {
36476 #if     SLAB_DEBUG_SUPPORT
36477     if (cachep->c_flags & SLAB_RED_ZONE) {
36478       *((unsigned long*)(objp)) = SLAB_RED_MAGIC1;
36479       objp += BYTES_PER_WORD;
36480       *((unsigned long*)(objp+cachep->c_org_size)) =
36481         SLAB_RED_MAGIC1;
36482     }
36483 #endif  /* SLAB_DEBUG_SUPPORT */
36484 
36485     /* Constructors are not allowed to allocate memory
36486      * from the same cache which they are a constructor
36487      * for.  Otherwise, deadlock.  They must also be
36488      * threaded.  */
36489     if (cachep->c_ctor)
36490       cachep->c_ctor(objp, cachep, ctor_flags);
36491 #if     SLAB_DEBUG_SUPPORT
36492     else if (cachep->c_flags & SLAB_POISON) {
36493       /* need to poison the objs */
36494       kmem_poison_obj(cachep, objp);
36495     }
36496 
36497     if (cachep->c_flags & SLAB_RED_ZONE) {
36498       if (*((unsigned long*)(objp+cachep->c_org_size)) !=
36499           SLAB_RED_MAGIC1) {
36500         *((unsigned long*)(objp+cachep->c_org_size)) =
36501           SLAB_RED_MAGIC1;
36502         printk(KERN_ERR
36503                "kmem_init_obj: Bad rear redzone "
36504                "after constructor - %s\n",
36505                cachep->c_name);
36506       }
36507       objp -= BYTES_PER_WORD;
36508       if (*((unsigned long*)(objp)) != SLAB_RED_MAGIC1) {
36509         *((unsigned long*)(objp)) = SLAB_RED_MAGIC1;
36510         printk(KERN_ERR
36511                "kmem_init_obj: Bad front redzone "
36512                "after constructor - %s\n",
36513                cachep->c_name);
36514       }
36515     }
36516 #endif  /* SLAB_DEBUG_SUPPORT */
36517 
36518     objp += cachep->c_offset;
36519     if (!slabp->s_index) {
36520       *bufpp = objp;
36521       objp += sizeof(kmem_bufctl_t);
36522     } else
36523       *bufpp = &slabp->s_index[num];
36524     bufpp = &(*bufpp)->buf_nextp;
36525   } while (num--);
36526 
36527   *bufpp = NULL;
36528 }
36529 
36530 /* Grow (by 1) the number of slabs within a cache.  This
36531  * is called by kmem_cache_alloc() when there are no
36532  * active objs left in a cache.  */
36533 static int
36534 kmem_cache_grow(kmem_cache_t * cachep, int flags)
36535 {
36536   kmem_slab_t     *slabp;
36537   struct page     *page;
36538   void            *objp;
36539   size_t           offset;
36540   unsigned int     dma, local_flags;
36541   unsigned long    ctor_flags;
36542   unsigned long    save_flags;
36543 
36544   /* Be lazy and only check for valid flags here, keeping
36545    * it out of the critical path in kmem_cache_alloc().
36546    */
36547   if (flags & ~(SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW)) {
36548     printk(KERN_WARNING "kmem_grow: Illegal flgs %X "
36549            "(correcting) - %s\n", flags, cachep->c_name);
36550     flags &= (SLAB_DMA|SLAB_LEVEL_MASK|SLAB_NO_GROW);
36551   }
36552 
36553   if (flags & SLAB_NO_GROW)
36554     return 0;
36555 
36556   /* The test for missing atomic flag is performed here,
36557    * rather than the more obvious place, simply to reduce
36558    * the critical path length in kmem_cache_alloc().  If
36559    * a caller is slightly mis-behaving they will
36560    * eventually be caught here (where it matters).  */
36561   if (in_interrupt() &&
36562       (flags & SLAB_LEVEL_MASK) != SLAB_ATOMIC) {
36563     printk(KERN_ERR "kmem_grow: Called nonatomically "
36564            "from int - %s\n", cachep->c_name);
36565     flags &= ~SLAB_LEVEL_MASK;
36566     flags |= SLAB_ATOMIC;
36567   }
36568   ctor_flags = SLAB_CTOR_CONSTRUCTOR;
36569   local_flags = (flags & SLAB_LEVEL_MASK);
36570   if (local_flags == SLAB_ATOMIC) {
36571     /* Not allowed to sleep.  Need to tell a constructor
36572      * about this - it might need to know...  */
36573     ctor_flags |= SLAB_CTOR_ATOMIC;
36574   }
36575 
36576   /* About to mess with non-constant members - lock. */
36577   spin_lock_irqsave(&cachep->c_spinlock, save_flags);
36578 
36579   /* Get colour for the slab, and cal the next value. */
36580   if (!(offset = cachep->c_colour_next--))
36581     cachep->c_colour_next = cachep->c_colour;
36582   offset *= cachep->c_align;
36583   cachep->c_dflags = SLAB_CFLGS_GROWN;
36584 
36585   cachep->c_growing++;
36586   spin_unlock_irqrestore(&cachep->c_spinlock,
36587                          save_flags);
36588 
36589   /* A series of memory allocations for a new slab.
36590    * Neither the cache-chain semaphore, or cache-lock,
36591    * are held, but the incrementing c_growing prevents
36592    * this this cache from being reaped or shrunk.  Note:
36593    * The cache could be selected in for reaping in
36594    * kmem_cache_reap(), but when the final test is made
36595    * the growing value will be seen.  */
36596 
36597   /* Get mem for the objs. */
36598   if (!(objp = kmem_getpages(cachep, flags, &dma)))
36599     goto failed;
36600 
36601   /* Get slab management. */
36602   if (!(slabp = kmem_cache_slabmgmt(cachep,
36603                                     objp+offset,
36604                                     local_flags)))
36605     goto opps1;
36606   if (dma)
36607     slabp->s_dma = 1;
36608   if (SLAB_BUFCTL(cachep->c_flags)) {
36609     slabp->s_index =
36610       kmem_cache_alloc(cachep->c_index_cachep,
36611                        local_flags);
36612     if (!slabp->s_index)
36613       goto opps2;
36614   }
36615 
36616   /* Nasty!!!!!!  I hope this is OK. */
36617   dma = 1 << cachep->c_gfporder;
36618   page = &mem_map[MAP_NR(objp)];
36619   do {
36620     SLAB_SET_PAGE_CACHE(page, cachep);
36621     SLAB_SET_PAGE_SLAB(page, slabp);
36622     PageSetSlab(page);
36623     page++;
36624   } while (--dma);
36625 
36626   slabp->s_offset = offset;       /* It will fit... */
36627   objp += offset;         /* Address of first object. */
36628   slabp->s_mem = objp;
36629 
36630   /* For on-slab bufctls, c_offset is the distance
36631    * between the start of an obj and its related bufctl.
36632    * For off-slab bufctls, c_offset is the distance
36633    * between objs in the slab.  */
36634   kmem_cache_init_objs(cachep, slabp, objp, ctor_flags);
36635 
36636   spin_lock_irq(&cachep->c_spinlock);
36637 
36638   /* Make slab active. */
36639   slabp->s_magic = SLAB_MAGIC_ALLOC;
36640   kmem_slab_link_end(cachep, slabp);
36641   if (cachep->c_freep == kmem_slab_end(cachep))
36642     cachep->c_freep = slabp;
36643   SLAB_STATS_INC_GROWN(cachep);
36644   cachep->c_failures = 0;
36645   cachep->c_growing--;
36646 
36647   spin_unlock_irqrestore(&cachep->c_spinlock,
36648                          save_flags);
36649   return 1;
36650 opps2:
36651   if (SLAB_OFF_SLAB(cachep->c_flags))
36652     kmem_cache_free(cache_slabp, slabp);
36653 opps1:
36654   kmem_freepages(cachep, objp);
36655 failed:
36656   spin_lock_irq(&cachep->c_spinlock);
36657   cachep->c_growing--;
36658   spin_unlock_irqrestore(&cachep->c_spinlock,
36659                          save_flags);
36660   return 0;
36661 }
36662 
36663 static void
36664 kmem_report_alloc_err(const char *str,
36665                       kmem_cache_t * cachep)
36666 {
36667   if (cachep)
36668     SLAB_STATS_INC_ERR(cachep);     /* this is atomic */
36669   printk(KERN_ERR "kmem_alloc: %s (name=%s)\n",
36670          str, cachep ? cachep->c_name : "unknown");
36671 }
36672 
36673 static void
36674 kmem_report_free_err(const char *str, const void *objp,
36675                      kmem_cache_t * cachep)
36676 {
36677   if (cachep)
36678     SLAB_STATS_INC_ERR(cachep);
36679   printk(KERN_ERR "kmem_free: %s (objp=%p, name=%s)\n",
36680          str, objp, cachep ? cachep->c_name : "unknown");
36681 }
36682 
36683 /* Search for a slab whose objs are suitable for DMA.
36684  * Note: since testing the first free slab (in
36685  * __kmem_cache_alloc()), ints must not have been
36686  * enabled, or the cache-lock released!  */
36687 static inline kmem_slab_t *
36688 kmem_cache_search_dma(kmem_cache_t * cachep)
36689 {
36690   kmem_slab_t     *slabp = cachep->c_freep->s_nextp;
36691 
36692   for (; slabp != kmem_slab_end(cachep);
36693        slabp = slabp->s_nextp) {
36694     if (!(slabp->s_dma))
36695       continue;
36696     kmem_slab_unlink(slabp);
36697     kmem_slab_link_free(cachep, slabp);
36698     cachep->c_freep = slabp;
36699     break;
36700   }
36701   return slabp;
36702 }
36703 
36704 #if     SLAB_DEBUG_SUPPORT
36705 /* Perform extra freeing checks.  Currently, this check
36706  * is only for caches that use bufctl structures within
36707  * the slab.  Those which use bufctl's from the internal
36708  * cache have a reasonable check when the address is
36709  * searched for.  Called with the cache-lock held.  */
36710 static void *
36711 kmem_extra_free_checks(kmem_cache_t * cachep,
36712                        kmem_bufctl_t *search_bufp,
36713                        kmem_bufctl_t *bufp, void * objp)
36714 {
36715   if (SLAB_BUFCTL(cachep->c_flags))
36716     return objp;
36717 
36718   /* Check slab's freelist to see if this obj is
36719    * there. */
36720   for (; search_bufp;
36721        search_bufp = search_bufp->buf_nextp) {
36722     if (search_bufp != bufp)
36723       continue;
36724     return NULL;
36725   }
36726   return objp;
36727 }
36728 #endif  /* SLAB_DEBUG_SUPPORT */
36729 
36730 /* Called with cache lock held. */
36731 static inline void
36732 kmem_cache_full_free(kmem_cache_t *cachep,
36733                      kmem_slab_t *slabp)
36734 {
36735   if (slabp->s_nextp->s_inuse) {
36736     /* Not at correct position. */
36737     if (cachep->c_freep == slabp)
36738       cachep->c_freep = slabp->s_nextp;
36739     kmem_slab_unlink(slabp);
36740     kmem_slab_link_end(cachep, slabp);
36741   }
36742 }
36743 
36744 /* Called with cache lock held. */
36745 static inline void
36746 kmem_cache_one_free(kmem_cache_t *cachep,
36747                     kmem_slab_t *slabp)
36748 {
36749   if (slabp->s_nextp->s_inuse == cachep->c_num) {
36750     kmem_slab_unlink(slabp);
36751     kmem_slab_link_free(cachep, slabp);
36752   }
36753   cachep->c_freep = slabp;
36754 }
36755 
36756 /* Returns a ptr to an obj in the given cache. */
36757 static inline void *
36758 __kmem_cache_alloc(kmem_cache_t *cachep, int flags)
36759 {
36760   kmem_slab_t     *slabp;
36761   kmem_bufctl_t   *bufp;
36762   void            *objp;
36763   unsigned long   save_flags;
36764 
36765   /* Sanity check. */
36766   if (!cachep)
36767     goto nul_ptr;
36768   spin_lock_irqsave(&cachep->c_spinlock, save_flags);
36769 try_again:
36770   /* Get slab alloc is to come from. */
36771   slabp = cachep->c_freep;
36772 
36773   /* Magic is a sanity check _and_ says if we need a new
36774    * slab. */
36775   if (slabp->s_magic != SLAB_MAGIC_ALLOC)
36776     goto alloc_new_slab;
36777   /* DMA requests are 'rare' - keep out of the critical
36778    * path. */
36779   if (flags & SLAB_DMA)
36780     goto search_dma;
36781 try_again_dma:
36782   SLAB_STATS_INC_ALLOCED(cachep);
36783   SLAB_STATS_INC_ACTIVE(cachep);
36784   SLAB_STATS_SET_HIGH(cachep);
36785   slabp->s_inuse++;
36786   bufp = slabp->s_freep;
36787   slabp->s_freep = bufp->buf_nextp;
36788   if (slabp->s_freep) {
36789 ret_obj:
36790     if (!slabp->s_index) {
36791       bufp->buf_slabp = slabp;
36792       objp = ((void*)bufp) - cachep->c_offset;
36793 finished:
36794       /* The lock is not needed by the red-zone or poison
36795        * ops, and the obj has been removed from the slab.
36796        * Should be safe to drop the lock here.  */
36797       spin_unlock_irqrestore(&cachep->c_spinlock,
36798                              save_flags);
36799 #if     SLAB_DEBUG_SUPPORT
36800       if (cachep->c_flags & SLAB_RED_ZONE)
36801         goto red_zone;
36802 ret_red:
36803       if ((cachep->c_flags & SLAB_POISON) &&
36804           kmem_check_poison_obj(cachep, objp))
36805         kmem_report_alloc_err("Bad poison", cachep);
36806 #endif  /* SLAB_DEBUG_SUPPORT */
36807       return objp;
36808     }
36809     /* Update index ptr. */
36810     objp = ((bufp-slabp->s_index)*cachep->c_offset) +
36811            slabp->s_mem;
36812     bufp->buf_objp = objp;
36813     goto finished;
36814   }
36815   cachep->c_freep = slabp->s_nextp;
36816   goto ret_obj;
36817 
36818 #if     SLAB_DEBUG_SUPPORT
36819 red_zone:
36820   /* Set alloc red-zone, and check old one. */
36821   if (xchg((unsigned long *)objp, SLAB_RED_MAGIC2)
36822       != SLAB_RED_MAGIC1)
36823     kmem_report_alloc_err("Bad front redzone", cachep);
36824   objp += BYTES_PER_WORD;
36825   if (xchg((unsigned long *)(objp+cachep->c_org_size),
36826            SLAB_RED_MAGIC2) != SLAB_RED_MAGIC1)
36827     kmem_report_alloc_err("Bad rear redzone", cachep);
36828   goto ret_red;
36829 #endif  /* SLAB_DEBUG_SUPPORT */
36830 
36831 search_dma:
36832   if (slabp->s_dma ||
36833       (slabp = kmem_cache_search_dma(cachep)) !=
36834       kmem_slab_end(cachep))
36835     goto try_again_dma;
36836 alloc_new_slab:
36837   /* Either out of slabs, or magic number corruption. */
36838   if (slabp == kmem_slab_end(cachep)) {
36839     /* Need a new slab.  Release the lock before calling
36840      * kmem_cache_grow().  This allows objs to be
36841      * released back into the cache while growing.  */
36842     spin_unlock_irqrestore(&cachep->c_spinlock,
36843                            save_flags);
36844     if (kmem_cache_grow(cachep, flags)) {
36845       /* Someone may have stolen our objs.  Doesn't
36846        * matter, we'll just come back here again.  */
36847       spin_lock_irq(&cachep->c_spinlock);
36848       goto try_again;
36849     }
36850     /* Couldn't grow, but some objs may have been
36851      * freed. */
36852     spin_lock_irq(&cachep->c_spinlock);
36853     if (cachep->c_freep != kmem_slab_end(cachep)) {
36854       if ((flags & SLAB_ATOMIC) == 0)
36855         goto try_again;
36856     }
36857   } else {
36858     /* Very serious error - maybe panic() here? */
36859     kmem_report_alloc_err("Bad slab magic (corrupt)",
36860                           cachep);
36861   }
36862   spin_unlock_irqrestore(&cachep->c_spinlock,
36863                          save_flags);
36864 err_exit:
36865   return NULL;
36866 nul_ptr:
36867   kmem_report_alloc_err("NULL ptr", NULL);
36868   goto err_exit;
36869 }
36870 
36871 /* Release an obj back to its cache.  If the obj has a
36872  * constructed state, it should be in this state _before_
36873  * it is released.  */
36874 static inline void
36875 __kmem_cache_free(kmem_cache_t *cachep, const void *objp)
36876 {
36877   kmem_slab_t     *slabp;
36878   kmem_bufctl_t   *bufp;
36879   unsigned long   save_flags;
36880 
36881   /* Basic sanity checks. */
36882   if (!cachep || !objp)
36883     goto null_addr;
36884 
36885 #if     SLAB_DEBUG_SUPPORT
36886   /* A verify func is called without the cache-lock
36887    * held. */
36888   if (cachep->c_flags & SLAB_DEBUG_INITIAL)
36889     goto init_state_check;
36890 finished_initial:
36891 
36892   if (cachep->c_flags & SLAB_RED_ZONE)
36893     goto red_zone;
36894 return_red:
36895 #endif  /* SLAB_DEBUG_SUPPORT */
36896 
36897   spin_lock_irqsave(&cachep->c_spinlock, save_flags);
36898 
36899   if (SLAB_BUFCTL(cachep->c_flags))
36900     goto bufctl;
36901   bufp = (kmem_bufctl_t *)(objp+cachep->c_offset);
36902 
36903   /* Get slab for the object. */
36904 #if     0
36905   /* _NASTY_IF/ELSE_, but avoids a 'distant' memory ref
36906    * for some objects.  Is this worth while? XXX */
36907   if (cachep->c_flags & SLAB_HIGH_PACK)
36908     slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(bufp)]);
36909   else
36910 #endif
36911     slabp = bufp->buf_slabp;
36912 
36913 check_magic:
36914   /* Sanity check. */
36915   if (slabp->s_magic != SLAB_MAGIC_ALLOC)
36916     goto bad_slab;
36917 
36918 #if     SLAB_DEBUG_SUPPORT
36919   if (cachep->c_flags & SLAB_DEBUG_FREE)
36920     goto extra_checks;
36921 passed_extra:
36922 #endif  /* SLAB_DEBUG_SUPPORT */
36923 
36924   if (slabp->s_inuse) {           /* Sanity check. */
36925     SLAB_STATS_DEC_ACTIVE(cachep);
36926     slabp->s_inuse--;
36927     bufp->buf_nextp = slabp->s_freep;
36928     slabp->s_freep = bufp;
36929     if (bufp->buf_nextp) {
36930       if (slabp->s_inuse) {
36931         /* (hopefully) The most common case. */
36932 finished:
36933 #if     SLAB_DEBUG_SUPPORT
36934         if (cachep->c_flags & SLAB_POISON) {
36935           if (cachep->c_flags & SLAB_RED_ZONE)
36936             objp += BYTES_PER_WORD;
36937           kmem_poison_obj(cachep, objp);
36938         }
36939 #endif  /* SLAB_DEBUG_SUPPORT */
36940         spin_unlock_irqrestore(&cachep->c_spinlock,
36941                                save_flags);
36942         return;
36943       }
36944       kmem_cache_full_free(cachep, slabp);
36945       goto finished;
36946     }
36947     kmem_cache_one_free(cachep, slabp);
36948     goto finished;
36949   }
36950 
36951   /* Don't add to freelist. */
36952   spin_unlock_irqrestore(&cachep->c_spinlock,
36953                          save_flags);
36954   kmem_report_free_err("free with no active objs",
36955                        objp, cachep);
36956   return;
36957 bufctl:
36958   /* No 'extra' checks are performed for objs stored this
36959    * way, finding the obj is check enough.  */
36960   slabp = SLAB_GET_PAGE_SLAB(&mem_map[MAP_NR(objp)]);
36961   bufp =  &slabp->s_index[(objp - slabp->s_mem) /
36962                           cachep->c_offset];
36963   if (bufp->buf_objp == objp)
36964     goto check_magic;
36965   spin_unlock_irqrestore(&cachep->c_spinlock,
36966                          save_flags);
36967   kmem_report_free_err("Either bad obj addr or double "
36968                        "free", objp, cachep);
36969   return;
36970 #if     SLAB_DEBUG_SUPPORT
36971 init_state_check:
36972   /* Need to call the slab's constructor so the caller
36973    * can perform a verify of its state (debugging).  */
36974   cachep->c_ctor(objp, cachep,
36975                  SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
36976   goto finished_initial;
36977 extra_checks:
36978   if (!kmem_extra_free_checks(cachep, slabp->s_freep,
36979                               bufp, objp)) {
36980     spin_unlock_irqrestore(&cachep->c_spinlock,
36981                            save_flags);
36982     kmem_report_free_err("Double free detected during "
36983                          "checks", objp, cachep);
36984     return;
36985   }
36986   goto passed_extra;
36987 red_zone:
36988   /* We do not hold the cache-lock while checking the
36989    * red-zone.  */
36990   objp -= BYTES_PER_WORD;
36991   if (xchg((unsigned long *)objp, SLAB_RED_MAGIC1) !=
36992       SLAB_RED_MAGIC2) {
36993     /* Either write before start of obj, or a double
36994      * free. */
36995     kmem_report_free_err("Bad front redzone", objp,
36996                          cachep);
36997   }
36998   if (xchg((unsigned long *)
36999            (objp+cachep->c_org_size+BYTES_PER_WORD),
37000            SLAB_RED_MAGIC1) != SLAB_RED_MAGIC2) {
37001     /* Either write past end of obj, or a double free. */
37002     kmem_report_free_err("Bad rear redzone",
37003                          objp, cachep);
37004   }
37005   goto return_red;
37006 #endif  /* SLAB_DEBUG_SUPPORT */
37007 
37008 bad_slab:
37009   /* Slab doesn't contain the correct magic num. */
37010   if (slabp->s_magic == SLAB_MAGIC_DESTROYED) {
37011     /* Magic num says this is a destroyed slab. */
37012     kmem_report_free_err("free from inactive slab",
37013                          objp, cachep);
37014   } else
37015     kmem_report_free_err("Bad obj addr", objp, cachep);
37016   spin_unlock_irqrestore(&cachep->c_spinlock,
37017                          save_flags);
37018 
37019 #if 1
37020 /* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL
37021  * CAPS. GET THE CALL CHAIN. */
37022 *(int *) 0 = 0;
37023 #endif
37024 
37025   return;
37026 null_addr:
37027   kmem_report_free_err("NULL ptr", objp, cachep);
37028   return;
37029 }
37030 
37031 void *
37032 kmem_cache_alloc(kmem_cache_t *cachep, int flags)
37033 {
37034   return __kmem_cache_alloc(cachep, flags);
37035 }
37036 
37037 void
37038 kmem_cache_free(kmem_cache_t *cachep, void *objp)
37039 {
37040   __kmem_cache_free(cachep, objp);
37041 }
37042 
37043 void *
37044 kmalloc(size_t size, int flags)
37045 {
37046   cache_sizes_t   *csizep = cache_sizes;
37047 
37048   for (; csizep->cs_size; csizep++) {
37049     if (size > csizep->cs_size)
37050       continue;
37051     return __kmem_cache_alloc(csizep->cs_cachep, flags);
37052   }
37053   printk(KERN_ERR "kmalloc: Size (%lu) too large\n",
37054          (unsigned long) size);
37055   return NULL;
37056 }
37057 
37058 void
37059 kfree(const void *objp)
37060 {
37061   struct page *page;
37062   int     nr;
37063 
37064   if (!objp)
37065     goto null_ptr;
37066   nr = MAP_NR(objp);
37067   if (nr >= max_mapnr)
37068     goto bad_ptr;
37069 
37070   /* Assume we own the page structure - hence no locking.
37071    * If someone is misbehaving (for example, calling us
37072    * with a bad address), then access to the page
37073    * structure can race with the kmem_slab_destroy()
37074    * code.  Need to add a spin_lock to each page
37075    * structure, which would be useful in threading the
37076    * gfp() functions....  */
37077   page = &mem_map[nr];
37078   if (PageSlab(page)) {
37079     kmem_cache_t    *cachep;
37080 
37081     /* Here, we again assume the obj address is good.  If
37082      * it isn't, and happens to map onto another general
37083      * cache page which has no active objs, then we race.
37084      */
37085     cachep = SLAB_GET_PAGE_CACHE(page);
37086     if (cachep &&
37087         (cachep->c_flags & SLAB_CFLGS_GENERAL)) {
37088       __kmem_cache_free(cachep, objp);
37089       return;
37090     }
37091   }
37092 bad_ptr:
37093   printk(KERN_ERR "kfree: Bad obj %p\n", objp);
37094 
37095 #if 1
37096 /* FORCE A KERNEL DUMP WHEN THIS HAPPENS. SPEAK IN ALL
37097  * CAPS. GET THE CALL CHAIN. */
37098 *(int *) 0 = 0;
37099 #endif
37100 
37101 null_ptr:
37102   return;
37103 }
37104 
37105 void
37106 kfree_s(const void *objp, size_t size)
37107 {
37108   struct page *page;
37109   int     nr;
37110 
37111   if (!objp)
37112     goto null_ptr;
37113   nr = MAP_NR(objp);
37114   if (nr >= max_mapnr)
37115     goto null_ptr;
37116   /* See comment in kfree() */
37117   page = &mem_map[nr];
37118   if (PageSlab(page)) {
37119     kmem_cache_t    *cachep;
37120     /* See comment in kfree() */
37121     cachep = SLAB_GET_PAGE_CACHE(page);
37122     if (cachep && cachep->c_flags & SLAB_CFLGS_GENERAL) {
37123       if (size <= cachep->c_org_size) {
37124         /* XXX better check */
37125         __kmem_cache_free(cachep, objp);
37126         return;
37127       }
37128     }
37129   }
37130 null_ptr:
37131   printk(KERN_ERR "kfree_s: Bad obj %p\n", objp);
37132   return;
37133 }
37134 
37135 kmem_cache_t *
37136 kmem_find_general_cachep(size_t size)
37137 {
37138   cache_sizes_t   *csizep = cache_sizes;
37139 
37140   /* This function could be moved to the header file, and
37141    * made inline so consumers can quickly determine what
37142    * cache pointer they require.
37143    */
37144   for (; csizep->cs_size; csizep++) {
37145     if (size > csizep->cs_size)
37146       continue;
37147     break;
37148   }
37149   return csizep->cs_cachep;
37150 }
37151 
37152 
37153 /* Called from try_to_free_page().
37154  * This function _cannot_ be called within a int, but it
37155  * can be interrupted.
37156  */
37157 void
37158 kmem_cache_reap(int gfp_mask)
37159 {
37160   kmem_slab_t     *slabp;
37161   kmem_cache_t    *searchp;
37162   kmem_cache_t    *best_cachep;
37163   unsigned int     scan;
37164   unsigned int     reap_level;
37165 
37166   if (in_interrupt()) {
37167     printk("kmem_cache_reap() called within int!\n");
37168     return;
37169   }
37170 
37171   /* We really need a test semaphore op so we can avoid
37172    * sleeping when !wait is true.  */
37173   down(&cache_chain_sem);
37174 
37175   scan = 10;
37176   reap_level = 0;
37177 
37178   best_cachep = NULL;
37179   searchp = clock_searchp;
37180   do {
37181     unsigned int    full_free;
37182     unsigned int    dma_flag;
37183 
37184     /* It's safe to test this without holding the
37185      * cache-lock. */
37186     if (searchp->c_flags & SLAB_NO_REAP)
37187       goto next;
37188     spin_lock_irq(&searchp->c_spinlock);
37189     if (searchp->c_growing)
37190       goto next_unlock;
37191     if (searchp->c_dflags & SLAB_CFLGS_GROWN) {
37192       searchp->c_dflags &= ~SLAB_CFLGS_GROWN;
37193       goto next_unlock;
37194     }
37195     /* Sanity check for corruption of static values. */
37196     if (searchp->c_inuse ||
37197         searchp->c_magic != SLAB_C_MAGIC) {
37198       spin_unlock_irq(&searchp->c_spinlock);
37199       printk(KERN_ERR "kmem_reap: Corrupted cache struct"
37200              " for %s\n", searchp->c_name);
37201       goto next;
37202     }
37203     dma_flag = 0;
37204     full_free = 0;
37205 
37206     /* Count the fully free slabs.  There should not be
37207      * not many, since we are holding the cache lock.  */
37208     slabp = searchp->c_lastp;
37209     while (!slabp->s_inuse &&
37210            slabp != kmem_slab_end(searchp)) {
37211       slabp = slabp->s_prevp;
37212       full_free++;
37213       if (slabp->s_dma)
37214         dma_flag++;
37215     }
37216     spin_unlock_irq(&searchp->c_spinlock);
37217 
37218     if ((gfp_mask & GFP_DMA) && !dma_flag)
37219       goto next;
37220 
37221     if (full_free) {
37222       if (full_free >= 10) {
37223         best_cachep = searchp;
37224         break;
37225       }
37226 
37227       /* Try to avoid slabs with constructors and/or more
37228        * than one page per slab (as it can be difficult
37229        * to get high orders from gfp()).  */
37230       if (full_free >= reap_level) {
37231         reap_level = full_free;
37232         best_cachep = searchp;
37233       }
37234     }
37235     goto next;
37236 next_unlock:
37237     spin_unlock_irq(&searchp->c_spinlock);
37238 next:
37239     searchp = searchp->c_nextp;
37240   } while (--scan && searchp != clock_searchp);
37241 
37242   clock_searchp = searchp;
37243   up(&cache_chain_sem);
37244 
37245   if (!best_cachep) {
37246     /* couldn't find anything to reap */
37247     return;
37248   }
37249 
37250   spin_lock_irq(&best_cachep->c_spinlock);
37251   while (!best_cachep->c_growing &&
37252          !(slabp = best_cachep->c_lastp)->s_inuse &&
37253          slabp != kmem_slab_end(best_cachep)) {
37254     if (gfp_mask & GFP_DMA) {
37255       do {
37256         if (slabp->s_dma)
37257           goto good_dma;
37258         slabp = slabp->s_prevp;
37259       } while (!slabp->s_inuse &&
37260                slabp != kmem_slab_end(best_cachep));
37261 
37262       /* Didn't found a DMA slab (there was a free one -
37263        * must have been become active).  */
37264       goto dma_fail;
37265 good_dma:
37266     }
37267     if (slabp == best_cachep->c_freep)
37268       best_cachep->c_freep = slabp->s_nextp;
37269     kmem_slab_unlink(slabp);
37270     SLAB_STATS_INC_REAPED(best_cachep);
37271 
37272     /* Safe to drop the lock.  The slab is no longer
37273      * linked to the cache.  */
37274     spin_unlock_irq(&best_cachep->c_spinlock);
37275     kmem_slab_destroy(best_cachep, slabp);
37276     spin_lock_irq(&best_cachep->c_spinlock);
37277   }
37278 dma_fail:
37279   spin_unlock_irq(&best_cachep->c_spinlock);
37280   return;
37281 }
37282 
37283 #if     SLAB_SELFTEST
37284 /* A few v. simple tests */
37285 static void
37286 kmem_self_test(void)
37287 {
37288   kmem_cache_t    *test_cachep;
37289 
37290   printk(KERN_INFO "kmem_test() - start\n");
37291   test_cachep =
37292     kmem_cache_create("test-cachep", 16, 0,
37293                       SLAB_RED_ZONE|SLAB_POISON,
37294                       NULL, NULL);
37295   if (test_cachep) {
37296     char *objp =
37297       kmem_cache_alloc(test_cachep, SLAB_KERNEL);
37298     if (objp) {
37299       /* Write in front and past end, red-zone test. */
37300       *(objp-1) = 1;
37301       *(objp+16) = 1;
37302       kmem_cache_free(test_cachep, objp);
37303 
37304       /* Mess up poisoning. */
37305       *objp = 10;
37306       objp = kmem_cache_alloc(test_cachep, SLAB_KERNEL);
37307       kmem_cache_free(test_cachep, objp);
37308 
37309       /* Mess up poisoning (again). */
37310       *objp = 10;
37311       kmem_cache_shrink(test_cachep);
37312     }
37313   }
37314   printk(KERN_INFO "kmem_test() - finished\n");
37315 }
37316 #endif  /* SLAB_SELFTEST */
37317 
37318 #if     defined(CONFIG_PROC_FS)
37319 /* /proc/slabinfo
37320  * cache-name num-active-objs total-objs num-active-slabs
37321  * ... total-slabs num-pages-per-slab
37322  */
37323 int
37324 get_slabinfo(char *buf)
37325 {
37326   kmem_cache_t    *cachep;
37327   kmem_slab_t     *slabp;
37328   unsigned long   active_objs;
37329   unsigned long   save_flags;
37330   unsigned long   num_slabs;
37331   unsigned long   num_objs;
37332   int             len=0;
37333 #if     SLAB_STATS
37334   unsigned long   active_slabs;
37335 #endif  /* SLAB_STATS */
37336 
37337   __save_flags(save_flags);
37338 
37339   /* Output format version, so at least we can change it
37340    * without _too_ many complaints.  */
37341 #if     SLAB_STATS
37342   len = sprintf(buf,
37343                "slabinfo - version: 1.0 (statistics)\n");
37344 #else
37345   len = sprintf(buf, "slabinfo - version: 1.0\n");
37346 #endif  /* SLAB_STATS */
37347   down(&cache_chain_sem);
37348   cachep = &cache_cache;
37349   do {
37350 #if     SLAB_STATS
37351     active_slabs = 0;
37352 #endif  /* SLAB_STATS */
37353     num_slabs = active_objs = 0;
37354     spin_lock_irq(&cachep->c_spinlock);
37355     for (slabp = cachep->c_firstp;
37356          slabp != kmem_slab_end(cachep);
37357          slabp = slabp->s_nextp) {
37358       active_objs += slabp->s_inuse;
37359       num_slabs++;
37360 #if     SLAB_STATS
37361       if (slabp->s_inuse)
37362         active_slabs++;
37363 #endif  /* SLAB_STATS */
37364     }
37365     num_objs = cachep->c_num*num_slabs;
37366 #if     SLAB_STATS
37367     {
37368       unsigned long errors;
37369       unsigned long high = cachep->c_high_mark;
37370       unsigned long grown = cachep->c_grown;
37371       unsigned long reaped = cachep->c_reaped;
37372       unsigned long allocs = cachep->c_num_allocations;
37373       errors =
37374         (unsigned long) atomic_read(&cachep->c_errors);
37375       spin_unlock_irqrestore(&cachep->c_spinlock,
37376                              save_flags);
37377       len += sprintf(buf+len,
37378                      "%-16s %6lu %6lu %4lu %4lu %4lu "
37379                      "%6lu %7lu %5lu %4lu %4lu\n",
37380                      cachep->c_name, active_objs,
37381                      num_objs, active_slabs, num_slabs,
37382                      (1<<cachep->c_gfporder)*num_slabs,
37383                      high, allocs, grown, reaped,errors);
37384     }
37385 #else
37386     spin_unlock_irqrestore(&cachep->c_spinlock,
37387                            save_flags);
37388     len += sprintf(buf+len, "%-17s %6lu %6lu\n",
37389                    cachep->c_name, active_objs,num_objs);
37390 #endif  /* SLAB_STATS */
37391   } while ((cachep = cachep->c_nextp) != &cache_cache);
37392   up(&cache_chain_sem);
37393 
37394   return len;
37395 }
37396 #endif  /* CONFIG_PROC_FS */

netlib.narod.ru< Назад | Оглавление | Далее >

Сайт управляется системой uCoz