mm/page_alloc.c
34498 /*
34499 * linux/mm/page_alloc.c
34500 *
34501 * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
34502 * Swap reorganised 29.12.95, Stephen Tweedie
34503 */
34504
34505 #include <linux/config.h>
34506 #include <linux/mm.h>
34507 #include <linux/kernel_stat.h>
34508 #include <linux/swap.h>
34509 #include <linux/swapctl.h>
34510 #include <linux/interrupt.h>
34511 #include <linux/init.h>
34512 #include <linux/pagemap.h>
34513
34514 #include <asm/dma.h>
34515 #include <asm/uaccess.h> /* for copy_to/from_user */
34516 #include <asm/pgtable.h>
34517
34518 int nr_swap_pages = 0;
34519 int nr_free_pages = 0;
34520
34521 /* Free area management
34522 *
34523 * The free_area_list arrays point to the queue heads of
34524 * the free areas of different sizes */
34525
34526 #if CONFIG_AP1000
34527 /* the AP+ needs to allocate 8MB contiguous, aligned
34528 * chunks of ram for the ring buffers */
34529 #define NR_MEM_LISTS 12
34530 #else
34531 #define NR_MEM_LISTS 6
34532 #endif
34533
34534 /* The start of this MUST match the start of "struct
34535 * page" */
34536 struct free_area_struct {
34537 struct page *next;
34538 struct page *prev;
34539 unsigned int * map;
34540 };
34541
34542 #define memory_head(x) ((struct page *)(x))
34543
34544 static struct free_area_struct free_area[NR_MEM_LISTS];
34545
34546 static inline void init_mem_queue(
34547 struct free_area_struct * head)
34548 {
34549 head->next = memory_head(head);
34550 head->prev = memory_head(head);
34551 }
34552
34553 static inline void add_mem_queue(
34554 struct free_area_struct * head, struct page * entry)
34555 {
34556 struct page * next = head->next;
34557
34558 entry->prev = memory_head(head);
34559 entry->next = next;
34560 next->prev = entry;
34561 head->next = entry;
34562 }
34563
34564 static inline void remove_mem_queue(struct page * entry)
34565 {
34566 struct page * next = entry->next;
34567 struct page * prev = entry->prev;
34568 next->prev = prev;
34569 prev->next = next;
34570 }
34571
34572 /* Free_page() adds the page to the free lists. This is
34573 * optimized for fast normal cases (no error jumps taken
34574 * normally).
34575 *
34576 * The way to optimize jumps for gcc-2.2.2 is to:
34577 * - select the "normal" case and put it inside the
34578 * if () { XXX }
34579 * - no else-statements if you can avoid them
34580 *
34581 * With the above two rules, you get a straight-line
34582 * execution path for the normal case, giving better
34583 * asm-code. */
34584
34585 /* Buddy system. Hairy. You really aren't expected to
34586 * understand this
34587 *
34588 * Hint: -mask = 1+~mask */
34589 spinlock_t page_alloc_lock = SPIN_LOCK_UNLOCKED;
34590
34591 static inline void free_pages_ok(unsigned long map_nr,
34592 unsigned long order)
34593 {
34594 struct free_area_struct *area = free_area + order;
34595 unsigned long index = map_nr >> (1 + order);
34596 unsigned long mask = (~0UL) << order;
34597 unsigned long flags;
34598
34599 spin_lock_irqsave(&page_alloc_lock, flags);
34600
34601 #define list(x) (mem_map+(x))
34602
34603 map_nr &= mask;
34604 nr_free_pages -= mask;
34605 while (mask + (1 << (NR_MEM_LISTS-1))) {
34606 if (!test_and_change_bit(index, area->map))
34607 break;
34608 remove_mem_queue(list(map_nr ^ -mask));
34609 mask <<= 1;
34610 area++;
34611 index >>= 1;
34612 map_nr &= mask;
34613 }
34614 add_mem_queue(area, list(map_nr));
34615
34616 #undef list
34617
34618 spin_unlock_irqrestore(&page_alloc_lock, flags);
34619 }
34620
34621 void __free_page(struct page *page)
34622 {
34623 if (!PageReserved(page) &&
34624 atomic_dec_and_test(&page->count)) {
34625 if (PageSwapCache(page))
34626 panic ("Freeing swap cache page");
34627 page->flags &= ~(1 << PG_referenced);
34628 free_pages_ok(page - mem_map, 0);
34629 return;
34630 }
34631 }
34632
34633 void free_pages(unsigned long addr, unsigned long order)
34634 {
34635 unsigned long map_nr = MAP_NR(addr);
34636
34637 if (map_nr < max_mapnr) {
34638 mem_map_t * map = mem_map + map_nr;
34639 if (PageReserved(map))
34640 return;
34641 if (atomic_dec_and_test(&map->count)) {
34642 if (PageSwapCache(map))
34643 panic ("Freeing swap cache pages");
34644 map->flags &= ~(1 << PG_referenced);
34645 free_pages_ok(map_nr, order);
34646 return;
34647 }
34648 }
34649 }
34650
34651 /* Some ugly macros to speed up __get_free_pages().. */
34652 #define MARK_USED(index, order, area) \
34653 change_bit((index) >> (1+(order)), (area)->map)
34654 #define CAN_DMA(x) (PageDMA(x))
34655 #define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT))
34656 #define RMQUEUE(order, gfp_mask) \
34657 do { \
34658 struct free_area_struct * area = free_area+order; \
34659 unsigned long new_order = order; \
34660 do { \
34661 struct page *prev = memory_head(area), \
34662 *ret = prev->next; \
34663 while (memory_head(area) != ret) { \
34664 if (!(gfp_mask & __GFP_DMA) || CAN_DMA(ret)) { \
34665 unsigned long map_nr; \
34666 (prev->next = ret->next)->prev = prev; \
34667 map_nr = ret - mem_map; \
34668 MARK_USED(map_nr, new_order, area); \
34669 nr_free_pages -= 1 << order; \
34670 EXPAND(ret, map_nr, order, new_order, area); \
34671 spin_unlock_irqrestore(&page_alloc_lock, flags);\
34672 return ADDRESS(map_nr); \
34673 } \
34674 prev = ret; \
34675 ret = ret->next; \
34676 } \
34677 new_order++; area++; \
34678 } while (new_order < NR_MEM_LISTS); \
34679 } while (0)
34680
34681 #define EXPAND(map,index,low,high,area) \
34682 do { \
34683 unsigned long size = 1 << high; \
34684 while (high > low) { \
34685 area--; high--; size >>= 1; \
34686 add_mem_queue(area, map); \
34687 MARK_USED(index, high, area); \
34688 index += size; \
34689 map += size; \
34690 } \
34691 atomic_set(&map->count, 1); \
34692 } while (0)
34693
34694 int low_on_memory = 0;
34695
34696 unsigned long __get_free_pages(int gfp_mask,
34697 unsigned long order)
34698 {
34699 unsigned long flags;
34700
34701 if (order >= NR_MEM_LISTS)
34702 goto nopage;
34703
34704 #ifdef ATOMIC_MEMORY_DEBUGGING
34705 if ((gfp_mask & __GFP_WAIT) && in_interrupt()) {
34706 static int count = 0;
34707 if (++count < 5) {
34708 printk("gfp called nonatomically from interrupt "
34709 "%p\n", __builtin_return_address(0));
34710 }
34711 goto nopage;
34712 }
34713 #endif
34714
34715 /* If this is a recursive call, we'd better do our best
34716 * to just allocate things without further thought. */
34717 if (!(current->flags & PF_MEMALLOC)) {
34718 int freed;
34719
34720 if (nr_free_pages > freepages.min) {
34721 if (!low_on_memory)
34722 goto ok_to_allocate;
34723 if (nr_free_pages >= freepages.high) {
34724 low_on_memory = 0;
34725 goto ok_to_allocate;
34726 }
34727 }
34728
34729 low_on_memory = 1;
34730 current->flags |= PF_MEMALLOC;
34731 freed = try_to_free_pages(gfp_mask);
34732 current->flags &= ~PF_MEMALLOC;
34733
34734 if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH)))
34735 goto nopage;
34736 }
34737 ok_to_allocate:
34738 spin_lock_irqsave(&page_alloc_lock, flags);
34739 RMQUEUE(order, gfp_mask);
34740 spin_unlock_irqrestore(&page_alloc_lock, flags);
34741
34742 /* If we can schedule, do so, and make sure to yield.
34743 * We may be a real-time process, and if kswapd is
34744 * waiting for us we need to allow it to run a bit. */
34745 if (gfp_mask & __GFP_WAIT) {
34746 current->policy |= SCHED_YIELD;
34747 schedule();
34748 }
34749
34750 nopage:
34751 return 0;
34752 }
34753
34754 /* Show free area list (used inside shift_scroll-lock
34755 * stuff) We also calculate the percentage
34756 * fragmentation. We do this by counting the memory on
34757 * each free list with the exception of the first item on
34758 * the list. */
34759 void show_free_areas(void)
34760 {
34761 unsigned long order, flags;
34762 unsigned long total = 0;
34763
34764 printk("Free pages: %6dkB\n ( ",
34765 nr_free_pages<<(PAGE_SHIFT-10));
34766 printk("Free: %d (%d %d %d)\n",
34767 nr_free_pages,
34768 freepages.min,
34769 freepages.low,
34770 freepages.high);
34771 spin_lock_irqsave(&page_alloc_lock, flags);
34772 for (order=0 ; order < NR_MEM_LISTS; order++) {
34773 struct page * tmp;
34774 unsigned long nr = 0;
34775 for (tmp = free_area[order].next ;
34776 tmp != memory_head(free_area+order) ;
34777 tmp = tmp->next) {
34778 nr ++;
34779 }
34780 total += nr * ((PAGE_SIZE>>10) << order);
34781 printk("%lu*%lukB ", nr,
34782 (unsigned long)((PAGE_SIZE>>10) << order));
34783 }
34784 spin_unlock_irqrestore(&page_alloc_lock, flags);
34785 printk("= %lukB)\n", total);
34786 #ifdef SWAP_CACHE_INFO
34787 show_swap_cache_info();
34788 #endif
34789 }
34790
34791 #define LONG_ALIGN(x) \
34792 (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
34793
34794 /* set up the free-area data structures:
34795 * - mark all pages reserved
34796 * - mark all memory queues empty
34797 * - clear the memory bitmaps */
34798 unsigned long __init free_area_init(
34799 unsigned long start_mem, unsigned long end_mem)
34800 {
34801 mem_map_t * p;
34802 unsigned long mask = PAGE_MASK;
34803 unsigned long i;
34804
34805 /* Select nr of pages we try to keep free for important
34806 * stuff with a minimum of 10 pages and a maximum of
34807 * 256 pages, so that we don't waste too much memory on
34808 * large systems. This is fairly arbitrary, but based
34809 * on some behaviour analysis. */
34810 i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+7);
34811 if (i < 10)
34812 i = 10;
34813 if (i > 256)
34814 i = 256;
34815 freepages.min = i;
34816 freepages.low = i * 2;
34817 freepages.high = i * 3;
34818 mem_map = (mem_map_t *) LONG_ALIGN(start_mem);
34819 p = mem_map + MAP_NR(end_mem);
34820 start_mem = LONG_ALIGN((unsigned long) p);
34821 memset(mem_map, 0,
34822 start_mem - (unsigned long) mem_map);
34823 do {
34824 --p;
34825 atomic_set(&p->count, 0);
34826 p->flags = (1 << PG_DMA) | (1 << PG_reserved);
34827 } while (p > mem_map);
34828
34829 for (i = 0 ; i < NR_MEM_LISTS ; i++) {
34830 unsigned long bitmap_size;
34831 init_mem_queue(free_area+i);
34832 mask += mask;
34833 end_mem = (end_mem + ~mask) & mask;
34834 bitmap_size =
34835 (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
34836 bitmap_size = (bitmap_size + 7) >> 3;
34837 bitmap_size = LONG_ALIGN(bitmap_size);
34838 free_area[i].map = (unsigned int *) start_mem;
34839 memset((void *) start_mem, 0, bitmap_size);
34840 start_mem += bitmap_size;
34841 }
34842 return start_mem;
34843 }
34844
34845 /* Primitive swap readahead code. We simply read an
34846 * aligned block of (1 << page_cluster) entries in the
34847 * swap area. This method is chosen because it doesn't
34848 * cost us any seek time. We also make sure to queue the
34849 * 'original' request together with the readahead ones...
34850 */
34851 void swapin_readahead(unsigned long entry)
34852 {
34853 int i;
34854 struct page *new_page;
34855 unsigned long offset = SWP_OFFSET(entry);
34856 struct swap_info_struct *swapdev =
34857 SWP_TYPE(entry) + swap_info;
34858
34859 offset = (offset >> page_cluster) << page_cluster;
34860
34861 i = 1 << page_cluster;
34862 do {
34863 /* Don't read-ahead past the end of the swap area */
34864 if (offset >= swapdev->max)
34865 break;
34866 /* Don't block on I/O for read-ahead */
34867 if (atomic_read(&nr_async_pages) >=
34868 pager_daemon.swap_cluster)
34869 break;
34870 /* Don't read in bad or busy pages */
34871 if (!swapdev->swap_map[offset])
34872 break;
34873 if (swapdev->swap_map[offset] == SWAP_MAP_BAD)
34874 break;
34875 if (test_bit(offset, swapdev->swap_lockmap))
34876 break;
34877
34878 /* Ok, do the async read-ahead now */
34879 new_page =
34880 read_swap_cache_async(SWP_ENTRY(SWP_TYPE(entry),
34881 offset), 0);
34882 if (new_page != NULL)
34883 __free_page(new_page);
34884 offset++;
34885 } while (--i);
34886 return;
34887 }
34888
34889 /* The tests may look silly, but it essentially makes
34890 * sure that no other process did a swap-in on us just as
34891 * we were waiting.
34892 *
34893 * Also, don't bother to add to the swap cache if this
34894 * page-in was due to a write access. */
34895 void swap_in(struct task_struct * tsk,
34896 struct vm_area_struct * vma, pte_t * page_table,
34897 unsigned long entry, int write_access)
34898 {
34899 unsigned long page;
34900 struct page *page_map = lookup_swap_cache(entry);
34901
34902 if (!page_map) {
34903 swapin_readahead(entry);
34904 page_map = read_swap_cache(entry);
34905 }
34906 if (pte_val(*page_table) != entry) {
34907 if (page_map)
34908 free_page_and_swap_cache(page_address(page_map));
34909 return;
34910 }
34911 if (!page_map) {
34912 set_pte(page_table, BAD_PAGE);
34913 swap_free(entry);
34914 oom(tsk);
34915 return;
34916 }
34917
34918 page = page_address(page_map);
34919 vma->vm_mm->rss++;
34920 tsk->min_flt++;
34921 swap_free(entry);
34922
34923 if (!write_access || is_page_shared(page_map)) {
34924 set_pte(page_table, mk_pte(page, vma->vm_page_prot));
34925 return;
34926 }
34927
34928 /* The page is unshared, and we want write access. In
34929 * this case, it is safe to tear down the swap cache
34930 * and give the page over entirely to this process. */
34931 if (PageSwapCache(page_map))
34932 delete_from_swap_cache(page_map);
34933 set_pte(page_table,
34934 pte_mkwrite(pte_mkdirty(mk_pte(page,
34935 vma->vm_page_prot))));
34936 return;
34937 }
Сайт управляется системой
uCoz